Changeset 4535


Ignore:
Timestamp:
Nov 22, 2013, 4:39:31 PM (9 years ago)
Author:
Fredrik Levander
Message:

Refs #819. Adding support for matching low-confidence identifications to features in presence of high-scoring hits with same sequence. Setting clusterIds for features with sequence (same cluster id for same sequence and charge).

Location:
trunk
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/api/core/conf/common-queries.xml

    r4506 r4535  
    255255    </description>
    256256  </query>
     257
     258  <query id="GET_UNIQUE_PEPTIDE_SEQUENCES_BELOW_FDR_IN_HITS_FOR_PROJECT" type="HQL">
     259    <sql>
     260      SELECT DISTINCT h.description
     261      FROM HitData h
     262      WHERE h.project = :project AND h.protein = false AND h.combinedFDR &lt;= :combinedFDR
     263      ORDER by h.description
     264    </sql>
     265    <description>
     266      Load all distinct (unique) peptide sequences in the Hits table for a project.
     267    </description>
     268  </query>
    257269 
    258270  <query id="GET_UNIQUE_CHARGES_IN_HITS_FOR_PROJECT" type="HQL">
  • trunk/api/core/src/org/proteios/core/Hit.java

    r4506 r4535  
    145145      "GET_UNIQUE_PEPTIDE_SEQUENCES_IN_HITS_FOR_PROJECT");
    146146    query.setEntity("project", project.getData());
     147    return query.list();
     148  }
     149 
     150  @SuppressWarnings("unchecked")
     151  public static List<String> getUniquePeptideSequencesBelowFDR(Project project,
     152      DbControl dc,float combinedFDR)
     153  {
     154    org.hibernate.Query query = HibernateUtil.getPredefinedQuery(dc
     155      .getHibernateSession(),
     156      "GET_UNIQUE_PEPTIDE_SEQUENCES_BELOW_FDR_IN_HITS_FOR_PROJECT");
     157    query.setEntity("project", project.getData());
     158    query.setFloat("combinedFDR", combinedFDR);
    147159    return query.list();
    148160  }
  • trunk/client/servlet/src/locale/en/dictionary

    r4522 r4535  
    848848SearchXTandemUsingLocalInstallation=Search X!Tandem using local installation
    849849SearchXTandemViaWebInterface=Search X!Tandem via web interface
     850SecondaryPeptideCutOff=Secondary peptide cutoff
    850851SelectFileType=Select File Type
    851852SelectFractionIdEntryMode=Fraction ID input:
  • trunk/client/servlet/src/org/proteios/action/feature/CreateFeatureHitMatchJob.java

    r4339 r4535  
    5757  public static final VFloat VALIGNTOL = new VFloat("aligntol",true);
    5858  public static final VFloat VFDRCUTOFF = new VFloat("fdrCutOff",true);
     59  public static final VFloat VSECONDARYFDRCUTOFF = new VFloat("secondaryFdrCutOff",true);
    5960 
    6061  /*
     
    7879    Float alignTimeTol = getValidFloat(VALIGNTOL);
    7980    Float fdrCutOff = getValidFloat(VFDRCUTOFF);
     81    Float secondaryFdrCutOff = getValidFloat(VSECONDARYFDRCUTOFF);
    8082    Boolean isSecondary = getValidBoolean(VSECONDARY);
    8183    if (isSecondary == null)
     
    115117      isSecondary);
    116118    job.setParameterValue("fdrCutOff", new FloatParameterType(), fdrCutOff);
     119    job.setParameterValue("secondaryFdrCutOff", new FloatParameterType(), secondaryFdrCutOff);
    117120    job.setName("Match features and hits");
    118121    job.setDescription("Matching in project:" + project+ ", FDR cutoff:" + fdrCutOff + ", tolerance:"+tolerance + ", secondary:"+isSecondary);
  • trunk/client/servlet/src/org/proteios/action/feature/FeatureHitMatchForm.java

    r4339 r4535  
    5757        CreateFeatureHitMatchJob.VFDRCUTOFF);
    5858    tolFDRF.setValue(new Float(0.01));
    59     tolFDRF.setLabel("peptideCutOff");
     59    tolFDRF.setLabel("PeptideCutOff");
    6060    tolFDRF.setHelp("Peptide identification FDR cutoff.");
    6161    properties.add(tolFDRF);
     62    TextField<Float> stolFDRF = new TextField<Float>(
     63        CreateFeatureHitMatchJob.VSECONDARYFDRCUTOFF);
     64    stolFDRF.setValue(new Float(1f));
     65    stolFDRF.setLabel("SecondaryPeptideCutOff");
     66    stolFDRF.setHelp("FDR cutoff for peptides that pass the primary FDR cutoff elsewhere in the project.");
     67    properties.add(stolFDRF);
    6268    Checkbox<VBoolean> isSecondaryF = new Checkbox<VBoolean>(
    6369        CreateFeatureHitMatchJob.VSECONDARY);
  • trunk/plugin/src/org/proteios/plugins/FeatureHitMatcher.java

    r4351 r4535  
    6161import java.io.OutputStream;
    6262import java.io.PrintWriter;
     63import java.sql.Timestamp;
    6364import java.util.ArrayList;
    6465import java.util.Arrays;
     66import java.util.Date;
    6567import java.util.List;
     68import java.util.TreeMap;
    6669
    6770/**
     
    7679  static float MONOISOTOPIC_PROTON_MASS = (float) 1.007276035;
    7780  boolean jobAborted = false;
     81  long currentCluster =0;
     82  TreeMap<String,Long> clusterMap = new TreeMap<String,Long>();
    7883
    7984  /**
     
    9398  public About getAbout() {
    9499    return new AboutImpl("Match features to hits",
    95         "Match features to hits for a project", "0.4",
    96         "2009-2012, Marianne Sandin, Fredrik Levander", null, null,
     100        "Match features to hits for a project", "0.5",
     101        "2009-2013, Marianne Sandin, Fredrik Levander", null, null,
    97102        "http://www.proteios.org");
    98103  }
     
    113118      double alignTimeTol = 4;
    114119      float fdrCutOff = 0.01f;
     120      float secondaryFdrCutOff = 1f;
    115121      int count = 0;
    116122      DbControl dc = sc.newDbControl();   
     
    146152      alignTimeTol = ((Float) job.getValue("alignTimeTol")).doubleValue();
    147153      fdrCutOff = ((Float) job.getValue("fdrCutOff")).floatValue();
     154      secondaryFdrCutOff = ((Float) job.getValue("secondaryFdrCutOff")).floatValue();
     155
    148156      boolean matchRelated = ((Boolean) job.getValue("secondary"))
    149157          .booleanValue();
     
    165173
    166174        writer.println("Hit FDR cutoff:" + fdrCutOff);
     175        writer.println("Secondary hit FDR cutoff:" + secondaryFdrCutOff);
    167176        writer.println("m/z tolerance:" + tolerance);
    168177        writer.println("RT (minutes) tolerance - start:" + startTimeTol
    169178            + " end:" + endTimeTol);
    170         writer.println("Matching with related files" + matchRelated);
     179        writer.println("Matching with related files:" + matchRelated);
    171180       
    172181        if (matchRelated) {
     
    185194            + "\n");
    186195        ItemQuery<Feature> featureQuery = qf.select(Feature.class);
     196        List<String> peptideSequenceList = Hit.getUniquePeptideSequencesBelowFDR(project, dc, fdrCutOff);
     197        for (int i=0;i<peptideSequenceList.size();i++)
     198        {
     199          String s = peptideSequenceList.get(i);
     200          if (s.contains("delta")) {
     201            peptideSequenceList.set(i,s.substring(0, s.indexOf("delta")));
     202          }
     203        }
     204        Timestamp tStmp = new Timestamp(new Date().getTime());
     205        currentCluster = tStmp.getTime();
    187206        featureQuery.restrictPermanent(Restrictions.eq(
    188207            Hql.property("project"), Hql.entity(project)));
     
    215234          writer.println("File:" + currentMsFile);
    216235          List<Hit> hits = retrieveHits(currentMsFile, project, dc2,
    217               fdrCutOff, matchRelated);
     236              fdrCutOff, secondaryFdrCutOff, peptideSequenceList, matchRelated);
    218237          List<Feature> features = featureQuery.list(dc2);
    219238          PolynomialSplineFunction func = null;
     
    387406          log.debug("m/z matching:" + f.getMassToChargeRatio()
    388407              + " hit:" + hitMz);
    389           // In rare cases the end retention is null!
     408          // In rare cases the start or end retention time can be null!
    390409          if (f.getEndRetentionTimeInMinutes() == null)
    391410            f.setEndRetentionTimeInMinutes(new Float(f
    392411                .getApexRetentionTimeInMinutes() + endTimeTol));
     412          if (f.getStartRetentionTimeInMinutes() == null)
     413            f.setStartRetentionTimeInMinutes(new Float(f
     414              .getApexRetentionTimeInMinutes() - startTimeTol));
    393415          if (hitRt >= f.getStartRetentionTimeInMinutes()
    394416              && hitRt <= f.getEndRetentionTimeInMinutes()) {
     
    413435                replaced++;
    414436              }
    415               f.setPeptideSequence(h.getDescription().substring(0,h.getDescription().contains("delta") ? h.getDescription().lastIndexOf(" ") : h.getDescription().length()));
     437              setFeatureSequenceAndClusterId(f, h);
    416438              f.addHit(h);
    417439              newmatches++;
     
    470492                  if (best) {
    471493                   
    472                     f.setPeptideSequence(h.getDescription().substring(0, h.getDescription().contains("delta") ? h.getDescription().lastIndexOf(" ") : h.getDescription().length()));
     494                    setFeatureSequenceAndClusterId(f, h);
    473495                    writer.println("Better scoring hit sequence replacing old");
    474496                    // We only want the precursor quantity
     
    526548              h.setPrecursorQuantity(f.getIntegratedIntensity());
    527549              f.addHit(h);
    528               f.setPeptideSequence(h.getDescription().substring(0, h.getDescription().contains("delta") ? h.getDescription().lastIndexOf(" ") : h.getDescription().length()));
     550              setFeatureSequenceAndClusterId(f, h);
    529551              count++;
    530552              newmatches++;
     
    579601    return count;
    580602  }
     603 
     604  private void setFeatureSequenceAndClusterId(Feature f, Hit h)
     605  {
     606    f.setPeptideSequence(h.getDescription().substring(0,h.getDescription().contains("delta") ? h.getDescription().lastIndexOf(" ") : h.getDescription().length()));
     607    String comb = f.getPeptideSequence()+","+f.getChargeState();
     608    if (clusterMap.containsKey(comb))
     609    {
     610      f.setClusterId(clusterMap.get(comb));
     611    }
     612    else
     613    {
     614      f.setClusterId(currentCluster);
     615      clusterMap.put(comb, currentCluster);
     616      currentCluster++;
     617    }
     618  }
    581619
    582620  private int getFirstHitForChargeState(List<Hit> hits, int charge) {
     
    590628
    591629  private List<Hit> retrieveHits(File msFile, Project project, DbControl dc,
    592       float fdrCutOff, boolean matchRelated) {
     630      float fdrCutOff, float secondaryFdrCutOff, List<String> peptideSequenceList, boolean matchRelated) {
    593631    List<Hit> hits = new ArrayList<Hit>();
    594632    ItemQuery<Hit> hitQuery = Hit.getQuery(project);
     633    int notSoGoodHits = 0;
    595634    // First check if the same msXML file can be found in the hits table
    596635    hitQuery.restrict(Restrictions.eq(Hql.property("peakListFile"),
     
    601640      hitQuery.setPermanentParameter("protein", false, null);
    602641      hitQuery.restrictPermanent(Restrictions.lteq(
    603           Hql.property("combinedFDR"), Expressions.aFloat(fdrCutOff)));
     642          Hql.property("combinedFDR"), Expressions.aFloat(secondaryFdrCutOff)));
    604643    }
    605644    if (hitQuery.count(dc) == 0) {
     
    668707        hitQuery.restrictPermanent(Restrictions.lteq(
    669708            Hql.property("combinedFDR"),
    670             Expressions.aFloat(fdrCutOff)));
     709            Expressions.aFloat(secondaryFdrCutOff)));
    671710        log.debug("Hit peakListFile:" + h.getPeakListFile());
    672711      }
     
    676715    ItemResultList<Hit> hitlist = hitQuery.list(dc);
    677716    for (Hit h : hitlist) {
    678       hits.add(h);
    679     }
    680     log.debug("Hits found:" + hits.size());
     717      if (h.getCombinedFDR()<=fdrCutOff)
     718        hits.add(h);
     719      else {
     720        String s = h.getDescription();
     721        if (s.contains("delta"))
     722        {
     723          s = s.substring(0, s.indexOf("delta"));
     724        }
     725        if (peptideSequenceList.contains(s))
     726        {
     727          hits.add(h);
     728          notSoGoodHits++;
     729        }
     730      }
     731    }
     732    log.debug("Hits found:" + hits.size() + ". " + notSoGoodHits + " retained above primary FDR threshold.");
    681733    return hits;
    682734  }
Note: See TracChangeset for help on using the changeset viewer.