Changeset 5937


Ignore:
Timestamp:
May 15, 2020, 11:38:47 AM (3 weeks ago)
Author:
Nicklas Nordborg
Message:

References #1239: Include Single Sample Predictor models in the StringTie? pipeline

Implemented support for a "Scores" annotation type that is paired with the result annotation type for each SSP. The "Scores" annotation will contain a semicolon-separated list with scores for all possible classes. Each score is given as a key=value entry in the list. For example LumA=0.001; LumB=0.99; ...and so on.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml

    r5927 r5937  
    8787        <!-- Each entry should be a filename of the *.RData object representing the model. -->
    8888        <!-- Each entry should have a 'name' and an associated 'annotation-type'. -->
     89        <!-- The 'annotation-type-scores' is optional. If provided it is used to store list with all classes and scores -->
    8990        <!-- A 'description' is optional. -->
    9091        <!-- The annotation type need to be created manually. -->
    91         <model name="Subtype" annotation-type="SSP_Subtype" description="">
     92        <model name="Subtype" annotation-type="SSP_Subtype" annotation-type-scores="SSP_Subtype_Scores" description="">
    9293          Training_Run19081Genes_noNorm_SSP.subtypeMost.Fcc15_5x5foldCV.num.rules.50_24.selRules.AIMS.GS.RData
    9394        </model>
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/ssp/SspModel.java

    r5927 r5937  
    8686  private final String modelData;
    8787  private final String annotationTypeName;
     88  private final String annotationTypeNameScores;
    8889  private AnnotationType annotationType;
    89   private boolean hasSearchedForAnnotationType;
     90  private AnnotationType annotationTypeScores;
     91  private boolean hasSearchedForAnnotationTypes;
    9092 
    9193  SspModel(Element e)
     
    9597    this.modelData = e.getTextTrim();
    9698    this.annotationTypeName = e.getAttributeValue("annotation-type");
     99    this.annotationTypeNameScores = e.getAttributeValue("annotation-type-scores");
    97100  }
    98101 
     
    133136 
    134137  /**
     138    Get the name of the mapped annotation type for scores.
     139    Configured in reggie-config.xml, "annotation-type-scores" attribute of the "model" tag.
     140  */
     141  public String getAnnotationTypeNameScores()
     142  {
     143    return annotationTypeNameScores;
     144  }
     145 
     146  /**
    135147    Get the mapped annotation type. Returns null if an
    136148    annotation type with expected name can't be found.
     
    138150  public AnnotationType getAnnotationType(DbControl dc)
    139151  {
    140     if (hasSearchedForAnnotationType) return annotationType;
    141 
    142     hasSearchedForAnnotationType = true;
     152    if (!hasSearchedForAnnotationTypes) searchForAnnotationTypes(dc);
     153    return annotationType;
     154  }
     155
     156  public AnnotationType getAnnotationTypeScores(DbControl dc)
     157  {
     158    if (!hasSearchedForAnnotationTypes) searchForAnnotationTypes(dc);
     159    return annotationTypeScores;
     160  }
     161 
     162  private void searchForAnnotationTypes(DbControl dc)
     163  {
     164    hasSearchedForAnnotationTypes = true;
    143165    if (annotationTypeName != null)
    144166    {
    145       ItemQuery<AnnotationType> query = AnnotationType.getQuery(Item.RAWBIOASSAY);
    146       query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
    147       query.restrict(
    148         Restrictions.eq(
    149           Hql.property("name"),
    150           Expressions.parameter("name", annotationTypeName, Type.STRING)
    151       ));
    152       query.setMaxResults(1);
     167      ItemQuery<AnnotationType> query = getQuery();
     168      query.setParameter("name", annotationTypeName, Type.STRING);
    153169      List<AnnotationType> list = query.list(dc);
    154       if (list.size() > 0)
     170      if (list.size() > 0) annotationType = list.get(0);
     171     
     172      if (annotationTypeNameScores != null)
    155173      {
    156         annotationType = list.get(0);
     174        query.setParameter("name", annotationTypeNameScores, Type.STRING);
     175        list = query.list(dc);
     176        if (list.size() > 0) annotationTypeScores = list.get(0);
    157177      }
    158178    }
    159     return annotationType;
    160   }
    161 
     179  }
     180 
     181  private ItemQuery<AnnotationType> getQuery()
     182  {
     183    ItemQuery<AnnotationType> query = AnnotationType.getQuery(Item.RAWBIOASSAY);
     184    query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT);
     185    query.restrict(
     186      Restrictions.eq(
     187        Hql.property("name"),
     188        Expressions.parameter("name", Type.STRING)
     189    ));
     190    query.setMaxResults(1);
     191    return query;
     192  }
     193 
    162194  @Override
    163195  public int hashCode()
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/ssp/SspModelResult.java

    r5668 r5937  
    1414  private final String result;
    1515 
     16  private final String resultClass;
     17  private final String allScores;
     18  //private final Map<String, Float> scores;
     19 
    1620  public SspModelResult(Rawbioassay raw, SspModel model, String result)
    1721  {
     
    1923    this.model = model;
    2024    this.result = result;
     25   
     26    String[] tmp = result.split("\\;", 2);
     27    this.resultClass = tmp[0];
     28    this.allScores = tmp[1].strip();
     29    /*
     30    TODO -- extract indivudual scores ??
     31    tmp = allScores.split("\\;");
     32    this.scores = new TreeMap<String, Float>();
     33    for (String t : tmp)
     34    {
     35      String[] s = t.split("\\=");
     36      scores.put(s[0], Float.parseFloat(s[1]));
     37    }
     38    */
    2139  }
    2240 
     
    3654  }
    3755 
     56  public String getResultClass()
     57  {
     58    return resultClass;
     59  }
     60 
     61  public String getAllScores()
     62  {
     63    return allScores;
     64  }
     65  /*
     66  public Set<String> getClasses()
     67  {
     68    return Collections.unmodifiableSet(scores.keySet());
     69  }
     70 
     71  public Float getScore(String className)
     72  {
     73    return scores.get(className);
     74  }
     75  */
    3876}
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/ssp/SspPlugin.java

    r5929 r5937  
    99import java.util.Map;
    1010import java.util.Set;
     11import java.util.regex.Matcher;
     12import java.util.regex.Pattern;
    1113
    1214import net.sf.basedb.core.AnnotationBatcher;
     
    167169          batcher.addAnnotationTypes(Collections.singleton(at));
    168170          logMsg.append("\t").append(name);
     171         
     172          AnnotationType scores = model.getAnnotationTypeScores(dc);
     173          if (scores != null)
     174          {
     175            batcher.addAnnotationTypes(Collections.singleton(scores));
     176            logMsg.append("\t").append(name).append("_Scores");
     177          }
    169178        }
    170179      }
     
    200209          {
    201210            SspModel model = modelResult.getModel();
     211            AnnotationType at = model.getAnnotationType(dc);
     212            AnnotationType scores = model.getAnnotationTypeScores(dc);
    202213            try
    203214            {
    204               Object value = model.getAnnotationType(dc).getValueType().parseString(translate(modelResult.getResult()));
    205               Change change = batcher.setValue(model.getAnnotationType(dc), value, null, false);
     215              String resultClass = translate(modelResult.getResultClass());
     216              Object value = at.getValueType().parseString(resultClass);
     217              Change change = batcher.setValue(at, value, null, false);
    206218              if (change != Change.NO_CHANGE) numAnnotations++;
    207219              logMsg.append("\t").append(value);
     220             
     221              if (scores != null)
     222              {
     223                String resultScores = translateScores(modelResult.getAllScores());
     224                change = batcher.setValue(scores, resultScores, null, false);
     225                if (change != Change.NO_CHANGE) numAnnotations++;
     226                logMsg.append("\t").append(resultScores);
     227              }
    208228            }
    209229            catch (RuntimeException ex)
     
    296316 
    297317  private Map<String, String> translations;
     318  // Translate some swedish to english
    298319  private String translate(String word)
    299320  {
     
    309330  }
    310331
     332  private Pattern translatePattern;
     333  // Translate each class in the scores list
     334  private String translateScores(String scores)
     335  {
     336    if (translatePattern == null)
     337    {
     338      // Pattern to match ClassA=ScoreA; ClassB=ScoreB; ...ClassN=ScoreN
     339      // We catch 'Class' in $1 and rest in $2
     340      translatePattern = Pattern.compile("(\\w+)(=.+?(;|$))");
     341    }
     342    Matcher m = translatePattern.matcher(scores);
     343    return m.replaceAll(mr -> translate(mr.group(1))+mr.group(2));
     344  }
     345 
    311346  // -------------------------------------
    312347  /*
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/ssp/runSSP.R

    r5667 r5937  
    3232    # apply ssp model
    3333    resultslist <- applyAIMS(mymatrix, myid, aims.gs);
    34 
    35     # print result to stdout
    36     cat("<", resultslist$cl[[1]], ">", "\n", sep="");
     34    all.probs <- c(resultslist$all.probs[[1]])
     35    names(all.probs) <- colnames(resultslist$all.probs[[1]])
     36   
     37    # print result to stdout (semicolon-separated list):
     38    # result class;class1=score;class2=score;...;classN=score
     39    cat("<", resultslist$cl[[1]], sep="");
     40    for (n in names(all.probs))
     41    {
     42      cat("; ", n, "=", all.probs[n], sep="");
     43    }
     44    cat(">\n");
    3745  }
    3846}
Note: See TracChangeset for help on using the changeset viewer.