Changeset 975


Ignore:
Timestamp:
Feb 26, 2009, 12:13:51 PM (15 years ago)
Author:
Martin Svensson
Message:

References #119 Some changes in the calculation-part. The plug-in runs through but the result has not been verified.

Location:
plugins/base2/net.sf.basedb.normalizers/trunk/src/net/sf/basedb/plugins
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.normalizers/trunk/src/net/sf/basedb/plugins/AbstractNormalizationPlugin.java

    r950 r975  
    165165      // Load spot data for this bioassay
    166166      short bioassayColumn = assay.getDataCubeColumnNo();
     167      if (!query.getParameterNames().contains("bioAssayColumn"))
     168      {
     169        // Create restriction: column = :bioAssayColumn
     170        Restriction bioAssayRestriction = Restrictions.eq(
     171            Dynamic.column(VirtualColumn.COLUMN),
     172            Expressions.parameter("bioAssayColumn")
     173          );
     174        query.restrict(bioAssayRestriction);
     175      }
    167176      query.setParameter("bioAssayColumn", (int)bioassayColumn, Type.INT);
    168177      DynamicResultIterator it = query.iterate(dc);
  • plugins/base2/net.sf.basedb.normalizers/trunk/src/net/sf/basedb/plugins/RankInvariantNormalization.java

    r967 r975  
    3939import net.sf.basedb.core.Transformation;
    4040import net.sf.basedb.core.Type;
     41import net.sf.basedb.core.VirtualColumn;
    4142import net.sf.basedb.core.plugin.About;
    4243import net.sf.basedb.core.plugin.AboutImpl;
     
    4546import net.sf.basedb.core.plugin.Request;
    4647import net.sf.basedb.core.plugin.Response;
     48import net.sf.basedb.core.query.Dynamic;
     49import net.sf.basedb.core.query.Expressions;
     50import net.sf.basedb.core.query.Restriction;
     51import net.sf.basedb.core.query.Restrictions;
    4752import net.sf.basedb.core.query.SqlResult;
    4853import net.sf.basedb.core.signal.SignalHandler;
     
    130135    storeValue(job, request, ri.getParameter(CHILD_DESCRIPTION));
    131136   
    132     storeValue(job, request, ri.getParameter("masterSample"));
     137    storeValues(job, request, ri.getParameter("masterSample"));
    133138    storeValue(job, request, ri.getParameter("numIterations"));
    134139   
     
    286291        );
    287292        parameters.add(masterSampleParameter);
    288         // TODO Add parameter to set the number of iterations to get the least square fit?
    289        
    290         //TODO More parameters?
     293   
    291294        String description = "TODO";
    292295       
     
    334337    query.reset();
    335338       
     339   
     340    // Create restriction: column = :bioAssayColumn
     341    Restriction bioAssayRestriction = Restrictions.eq(
     342        Dynamic.column(VirtualColumn.COLUMN),
     343        Expressions.parameter("bioAssayColumn")
     344      );
     345    query.restrictPermanent(bioAssayRestriction);
     346   
    336347    // Get spot data for the master sample
    337348    if (progress != null) progress.display(0, "Calculate master sample");
     
    343354    for (BioAssay assay : assays)
    344355    {
     356      // Update progress reporter
     357      if(progress != null) progress.display(
     358          (int)((100L * normalizedSpots)/numSpots),
     359        normalizedSpots + " spots normalized");
     360     
    345361      short bioassayColumn = assay.getDataCubeColumnNo();
    346362      List<AbstractSpotData> data = getSpots(dc, assay, query, assay.getRawDataType().getChannels());
     
    349365       
    350366      // Pick out rank invariant genes from the highRank(90th percentile) and lowRank(50th-25th percentile).
    351       float rankQuot = 0;
    352       int highRankIndex = Math.round(((data.size() / 100)*highRank)+0.5f);
     367      double rankQuot = 0.0;
     368      int highRankIndex = Math.round(((data.size() / 100f)*highRank)+0.5f);
    353369      int lowRankIndex;
    354370      int decrLowRank = 0;
    355       List<SpotData1Ch> rankInvSampleGenes;
    356       List<Double> rankInvMasterGenes;
     371      List<SpotData1Ch> rankInvSampleGenes = new ArrayList<SpotData1Ch>();
     372      List<Double> rankInvMasterGenes = new ArrayList<Double>();
    357373      // Iterate until at least 2% of the genes are considered to be rank invariant.
    358374      // LowRank is decreased with 5 steps each time.
    359       do
    360       {
    361         rankInvSampleGenes  = new ArrayList<SpotData1Ch>();
     375      while (rankQuot < 0.02f && ((lowRank-decrLowRank) >= 25))
     376      {
     377        lowRankIndex = Math.round(((data.size() / 100)*(lowRank-decrLowRank))+0.5f);
     378
     379        rankInvSampleGenes = new ArrayList<SpotData1Ch>();
    362380        rankInvMasterGenes = new ArrayList<Double>();
    363        
    364         lowRankIndex = Math.round(((data.size() / 100)*(lowRank-decrLowRank))+0.5f);
    365        
    366         // Map<position, rank>   
    367         Map<Integer, Integer> sampleRankMap = getRankedPositions(data, highRankIndex, lowRankIndex);       
    368         Map<Integer, Integer> masterRankMap = getRankedPositions(masterSample, highRankIndex, lowRankIndex);
    369        
    370         for (int i=0; i < dataSize ; i++)
    371         {           
    372           AbstractSpotData d = data.get(i);
    373           // Gets the gene's ranking in master and current sample
    374           Integer masterRank = masterRankMap.get(d.getPosition());
    375           Integer sampleRank = sampleRankMap.get(d.getPosition());
     381        //Map<position, rank>
     382        Map<Integer, Integer> sampleRanks = new HashMap<Integer, Integer>();
     383        Map<Integer, Integer> masterRanks = new HashMap<Integer, Integer>();
     384       
     385        for (int i=lowRankIndex; i<=highRankIndex; i++)
     386        {
     387          sampleRanks.put(data.get(i).getPosition(), i);
     388          masterRanks.put(masterSample.get(i).getPosition(), i);
    376389         
    377           // Check the relative rank between master and sample if
    378           //the position exists in both of them
    379           if (masterRank != null && sampleRank != null)
     390        }
     391        for (Integer pos : sampleRanks.keySet())
     392        {
     393          Integer sRank = sampleRanks.get(pos);
     394          Integer mRank = masterRanks.get(pos);
     395          float distance = (sRank != null && mRank != null) ? Math.abs((sRank - mRank) / mRank) : 1f;
     396          if (distance < 0.05)
    380397          {
    381             float div = (Math.abs(sampleRank-masterRank) / masterRank);
    382             if (div < 0.05)
    383             {
    384               rankInvSampleGenes.add((SpotData1Ch)d);
    385               rankInvMasterGenes.add((double)masterSample.get(masterRank).getNormalizableData());
    386             }
     398            rankInvSampleGenes.add((SpotData1Ch)data.get(sRank));
     399            rankInvMasterGenes.add((double)masterSample.get(mRank).getNormalizableData());
    387400          }
    388         }       
    389         rankQuot = rankInvSampleGenes.size() / data.size();
     401        }
     402        float numRankedGenes = rankInvSampleGenes.size();       
     403        rankQuot = numRankedGenes / new Float(highRankIndex-lowRankIndex+1);
    390404        decrLowRank += 5;
    391       }while (rankQuot < 0.02 || (lowRank-decrLowRank) >= 25);
     405      }
    392406
    393407      for (SpotData1Ch spot : rankInvSampleGenes)
     
    395409        List<Double> weights = getBiSquareWeights(rankInvSampleGenes, spot.getNormalizableData());
    396410        double[] km = getNormCoeffs(rankInvMasterGenes, rankInvSampleGenes, weights);
    397         float newCh1 = new Float((spot.getNormalizableData()-km[1]) / km[0]);
    398        
    399         batcher.insert(bioassayColumn, spot.getPosition(), newCh1);       
    400       }
    401       normalizedSpots += rankInvSampleGenes.size();
    402      
    403       // Update progress reporter
    404       if(progress != null) progress.display(
    405           (int)((100L * normalizedSpots)/numSpots),
    406         normalizedSpots + " spots normalized");
     411        float newIntensity = new Float((spot.getNormalizableData()-km[1]) / km[0]);
     412       
     413        batcher.insert(bioassayColumn, spot.getPosition(), newIntensity);       
     414      }
     415      normalizedSpots += rankInvSampleGenes.size();     
    407416    }
    408417    batcher.flush();
     
    410419   
    411420    return child;
    412   }
    413 
    414   /*
    415     Gets positions of those genes that are ranked between two values.
    416     The data-parameter must be list sorted asc.
    417    */
    418   @SuppressWarnings("unchecked")
    419   private Map<Integer, Integer> getRankedPositions(List<? extends AbstractSpotData> data, int highRankIndex, int lowRankIndex)
    420   {   
    421     Map<Integer, Integer> rankedPos = new HashMap<Integer, Integer>();
    422     for (AbstractSpotData spd : data)
    423     {
    424       int dataIndex = data.indexOf(spd);
    425       if (dataIndex <= highRankIndex && dataIndex >= lowRankIndex)
    426       {
    427         rankedPos.put(spd.getPosition(), data.indexOf(spd));
    428       }
    429     }
    430     return rankedPos;
    431421  }
    432422
     
    440430    {
    441431      BioAssay assay = (BioAssay)obj;
     432      assay = BioAssay.getById(dc, assay.getId());
    442433      try
    443434      {
Note: See TracChangeset for help on using the changeset viewer.