Changeset 3878


Ignore:
Timestamp:
Oct 25, 2007, 11:57:31 AM (14 years ago)
Author:
Nicklas Nordborg
Message:

References #797: Enhance performance for LOWESS and Median-ratio plug-ins

Median-ratio has now been optimized to use fewer database queries. Initial test results indicates at
least 90% reduced execution time, but I have only tested on a 1-4 bioassays.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/plugins/core/net/sf/basedb/plugins/MedianRatioNormalization.java

    r3679 r3878  
    2626package net.sf.basedb.plugins;
    2727
     28import java.sql.SQLException;
    2829import java.util.ArrayList;
    2930import java.util.Arrays;
     
    6061import net.sf.basedb.core.plugin.Request;
    6162import net.sf.basedb.core.plugin.Response;
    62 import net.sf.basedb.core.query.Aggregations;
    6363import net.sf.basedb.core.query.Dynamic;
     64import net.sf.basedb.core.query.Expression;
    6465import net.sf.basedb.core.query.Expressions;
    65 import net.sf.basedb.core.query.JoinType;
    6666import net.sf.basedb.core.query.Orders;
    6767import net.sf.basedb.core.query.Restriction;
    6868import net.sf.basedb.core.query.Restrictions;
    69 import net.sf.basedb.core.query.Select;
    7069import net.sf.basedb.core.query.Selects;
    7170import net.sf.basedb.core.query.SqlResult;
     
    7372
    7473/**
    75    @author enell
     74   @author enell, Nicklas
    7675   @version 2.0
    7776   @base.modified $Date$
     
    9392    "The normalizer lets you exclude a percentage of the highest and lowest intensities as wel" +
    9493    "l as all spots with at least one intensity below a fixed value.",
    95     "1.0",
     94    "2.5",
    9695    "2006, Base 2 development team",
    9796    null,
     
    189188        String childName = Values.getString((String)job.getValue(CHILD_NAME), source.getName());
    190189        String childDescription = (String)job.getValue(CHILD_DESCRIPTION);
    191        
    192         // Create Transformation
    193         Transformation t = source.newTransformation(getCurrentJob(dc));
    194         t.setName(about.getName());
    195         dc.saveItem(t);
    196        
    197         // Create the normalized bioassay set
    198         BioAssaySet child = t.newProduct(null, "new", true);
     190        int blockGroupSize = (Integer) job.getValue(blockGroupParameter.getName());
     191        float minIntensity = (Float)job.getValue(minIntensityParameter.getName());
     192        float lowExclude = (Float) job.getValue(lowExcludeParameter.getName());
     193        float highExclude = (Float) job.getValue(highExcludeParameter.getName());
     194        Job thisJob = getCurrentJob(dc);
     195       
     196        BioAssaySet child = normalize(dc, source, thisJob, minIntensity, lowExclude, highExclude, blockGroupSize, progress);
    199197        child.setName(childName);
    200198        child.setDescription(childDescription);
    201         dc.saveItem(child);
    202        
    203         // Batcher for inserting normalized data
    204         batcher = child.getSpotBatcher();
    205        
    206         Select ratio = Selects.expression(Expressions.divide(Dynamic.column(VirtualColumn.channel(1)), Dynamic.column(VirtualColumn.channel(2))), "ratio");
    207         Restriction intensityRestriction = Restrictions.and
    208         (
    209           Restrictions.gt(Dynamic.column(VirtualColumn.channel(1)), Expressions.aFloat((Float) job.getValue(minIntensityParameter.getName()))),
    210           Restrictions.gt(Dynamic.column(VirtualColumn.channel(2)), Expressions.aFloat((Float) job.getValue(minIntensityParameter.getName())))
    211         );
    212         int blockGroupSize = (Integer) job.getValue(blockGroupParameter.getName());
    213         float lowExclude = (Float) job.getValue(lowExcludeParameter.getName());
    214         float highExclude = (Float) job.getValue(highExcludeParameter.getName());
    215        
    216         DynamicSpotQuery query;
    217         DynamicResultIterator resultIter;
    218         query = source.getSpotData();
    219         query.restrict(intensityRestriction);
    220         long numSpots = query.count(dc);
    221         int normalizedSpots = 0;
    222         if (progress != null) progress.display((int)(normalizedSpots / numSpots * 100), normalizedSpots + " spots normalized");
    223 
    224         List<BioAssay> assays = source.getBioAssays().list(dc);
    225         for (BioAssay assay : assays)
    226         {
    227           query = assay.getSpotData();
    228           query.restrict(intensityRestriction);
    229           query.joinRawData(JoinType.LEFT);
    230           query.select(Selects.expression(Aggregations.max(Dynamic.rawData("block")), "max"));
    231           query.select(Selects.expression(Aggregations.min(Dynamic.rawData("block")), "min"));
    232           resultIter = query.iterate(dc);
    233           SqlResult result = resultIter.next();
    234           int maxBlock = result.getInt(resultIter.getIndex("max"));
    235           int minBlock = result.getInt(resultIter.getIndex("min"));
    236 
    237           for (int i = minBlock; i <= maxBlock; i += blockGroupSize)
    238           {
    239             Restriction blockRestriction = Restrictions.between
    240             (
    241               Expressions.selected(Dynamic.selectRawData("block")),
    242               Expressions.integer(i),
    243               Expressions.integer(i+blockGroupSize)
    244             );
    245            
    246             query = assay.getSpotData();
    247             query.joinRawData(JoinType.LEFT);
    248             query.select(Dynamic.select(VirtualColumn.channel(1)));
    249             query.select(Dynamic.select(VirtualColumn.channel(2)));
    250             query.select(Dynamic.selectRawData("block"));
    251             query.select(ratio);
    252             query.restrict(intensityRestriction);
    253             query.restrict(blockRestriction);
    254             query.order(Orders.asc(Expressions.selected(ratio)));
    255            
    256             int count = (int)query.count(dc);
    257             if (count <= 0) continue;
    258             int lowCount = (int) Math.floor(count * lowExclude / 100);
    259             int highCount = (int) Math.floor(count * highExclude / 100);
    260 
    261             // check count > 0
    262             int include = count - lowCount - highCount;
    263             if (include <= 0) continue;
    264             double median;
    265             if (include % 2 == 0)
    266             {
    267               query.setFirstResult((include / 2) - 1 + lowCount);
    268               query.setMaxResults(2);
    269               DynamicResultIterator iter = query.iterate(dc);
    270               median = Math.sqrt(iter.next().getFloat(iter.getIndex("ratio")) * iter.next().getFloat(iter.getIndex("ratio")));
    271             }
    272             else
    273             {
    274               query.setFirstResult(((include - 1) / 2) + lowCount);
    275               query.setMaxResults(1);
    276               DynamicResultIterator iter = query.iterate(dc);
    277               median = iter.next().getFloat(iter.getIndex("ratio"));
    278             }
    279 
    280             float sqrtMedRatio = (float) Math.sqrt(median);
    281             float invSqrtMedRatio = 1 / sqrtMedRatio;
    282             Select newCh1 = Selects.expression(Expressions.multiply(Expressions.selected(Dynamic.select(VirtualColumn.channel(1))),
    283               Expressions.aFloat(invSqrtMedRatio)), "newCh1");
    284             Select newCh2 = Selects.expression(Expressions.multiply(Expressions.selected(Dynamic.select(VirtualColumn.channel(2))),
    285               Expressions.aFloat(sqrtMedRatio)), "newCh2");
    286 
    287             query = assay.getSpotData();
    288             query.joinRawData(JoinType.LEFT);
    289             query.select(Dynamic.select(VirtualColumn.COLUMN));
    290             query.select(Dynamic.select(VirtualColumn.POSITION));
    291             query.select(newCh1);
    292             query.select(newCh2);
    293             query.restrict(intensityRestriction);
    294             query.restrict(blockRestriction);
    295             query.setParameter("low", new Integer(i), Type.INT);
    296             query.setParameter("high", new Integer(i + blockGroupSize), Type.INT);
    297 
    298             DynamicResultIterator iter = query.iterate(dc);
    299             int column = iter.getIndex(VirtualColumn.COLUMN.getName());
    300             int position = iter.getIndex(VirtualColumn.POSITION.getName());
    301             int ch1 = iter.getIndex("newCh1");
    302             int ch2 = iter.getIndex("newCh2");
    303             while (iter.hasNext())
    304             {
    305               SqlResult r = iter.next();
    306               batcher.insert(r.getShort(column), r.getInt(position), r.getFloat(ch1), r.getFloat(ch2));
    307               normalizedSpots++;
    308             }
    309             if (progress != null) progress.display((int)(normalizedSpots * 100 / numSpots), normalizedSpots + " spots normalized");
    310           }
    311         }
    312         if (progress != null) progress.append("\n");
    313         batcher.close();
    314199        dc.commit();
     200        int normalizedSpots = child.getNumSpots();
    315201        response.setDone(normalizedSpots + " spots normalized, " + (source.getNumSpots() - normalizedSpots) + " spots removed");
    316202      }
     
    408294  // -------------------------------------------
    409295 
     296  /**
     297    Normalise the source bioassay set using MEDIAN-RATIO normalization.
     298    @param dc The DbControl to use for database access
     299    @param source The source bioassay set that is going to be normalized
     300    @param job The job that is doing the normalization, or null
     301    @param minIntensity All spots which have a lower intensity value in either channel
     302      will be filtered out
     303    @param lowExclude A percentage of the spots with lowest ratio that are excuded in the
     304      median calculation
     305    @param highExclude A percentage of the spots with highest ratio that are excluded in the
     306      median calculation
     307    @param blockGroupSize
     308    @return The normalized bioassayset
     309    @since 2.5
     310  */
     311  public BioAssaySet normalize(DbControl dc, BioAssaySet source, Job job, float minIntensity, float lowExclude, float highExclude, int blockGroupSize, ProgressReporter progress)
     312  {
     313    if (progress != null) progress.display(0, "Preparing to normalize...");
     314
     315    // Create Transformation
     316    Transformation t = source.newTransformation(getCurrentJob(dc));
     317    t.setName(about.getName());
     318    dc.saveItem(t);
     319   
     320    // Create the normalized bioassay set
     321    BioAssaySet child = t.newProduct(null, "new", true);
     322    dc.saveItem(child);
     323   
     324    // Batcher for inserting normalized data
     325    SpotBatcher batcher = child.getSpotBatcher();
     326   
     327    // Expressions used to get data
     328    Expression block = Dynamic.rawData("block");
     329    Expression ch1 = Dynamic.column(VirtualColumn.channel(1));
     330    Expression ch2 = Dynamic.column(VirtualColumn.channel(2));
     331    // ratio = ch1 / ch2
     332    Expression ratio = Expressions.divide(ch1, ch2);
     333   
     334    //   Create restriction: ch1 > minIntensity and ch2 > minIntensity
     335    Restriction intensityRestriction = Restrictions.and
     336    (
     337      Restrictions.gt(Dynamic.column(VirtualColumn.channel(1)), Expressions.aFloat(minIntensity)),
     338      Restrictions.gt(Dynamic.column(VirtualColumn.channel(2)), Expressions.aFloat(minIntensity))
     339    );
     340   
     341    // Create restriction: column = :bioAssayColumn
     342    Restriction bioAssayRestriction = Restrictions.eq(
     343        Dynamic.column(VirtualColumn.COLUMN),
     344        Expressions.parameter("bioAssayColumn")
     345      );
     346   
     347    // Count number of spots that is going to be normalized
     348    DynamicSpotQuery query = source.getSpotData();
     349    query.restrict(intensityRestriction);
     350    long numSpots = query.count(dc);
     351    int normalizedSpots = 0;
     352    if (progress != null) progress.display((int)(normalizedSpots / numSpots * 100), normalizedSpots + " spots normalized");
     353   
     354    // Create query to retrieve spot data: COLUMN, POSITION, ch1, ch2, block
     355    // We use a parameter to restrict the query to return data for one bioassay at a time
     356    query.select(Dynamic.select(VirtualColumn.POSITION));
     357    query.select(Selects.expression(ch1, "ch1"));
     358    query.select(Selects.expression(ch2, "ch2"));
     359    query.select(Selects.expression(block, "block"));
     360    query.restrict(bioAssayRestriction);
     361    query.order(Orders.asc(block));
     362    query.order(Orders.asc(ratio));
     363
     364    // Normalize one bioassay at a time
     365    List<BioAssay> assays = source.getBioAssays().list(dc);
     366    try
     367    {
     368      for (BioAssay assay : assays)
     369      {
     370        // Prepare list for holding data
     371        int assaySpots = assay.getNumSpots();
     372        List<SpotData> data = new ArrayList<SpotData>(assaySpots);
     373       
     374        // Load spot data for this bioassay
     375        short bioassayColumn = assay.getDataCubeColumnNo();
     376        query.setParameter("bioAssayColumn", (int)bioassayColumn, Type.INT);
     377       
     378        DynamicResultIterator it = query.iterate(dc);
     379        int positionIndex = it.getIndex(VirtualColumn.POSITION.getName());
     380        int ch1Index = it.getIndex("ch1");
     381        int ch2Index = it.getIndex("ch2");
     382        int blockIndex = it.getIndex("block");
     383       
     384        // Copy bioassay data to SpotData objects
     385        while (it.hasNext())
     386        {
     387          SqlResult r = it.next();
     388          SpotData spot = new SpotData(r.getInt(positionIndex),
     389            r.getFloat(ch1Index), r.getFloat(ch2Index), r.getInt(blockIndex));
     390          data.add(spot);
     391        }
     392        it.close();
     393       
     394        // Continue with next bioassay if there is no data
     395        int dataSize = data.size();
     396        if (dataSize == 0) continue;
     397        // Get range of block numbers - NOTE! query must return spots sorted in block order
     398        int minBlock = data.get(0).block;
     399        int maxBlock = data.get(data.size()-1).block;
     400       
     401        int fromIndex = 0;
     402        int toIndex = 0;
     403        int fromBlock = minBlock;
     404        // Normalize each block range independently: fromBlock + blockGroupSize --> toBlock
     405        while (fromBlock <= maxBlock)
     406        {
     407          // Find start and end index for current block range
     408          int toBlock = fromBlock + blockGroupSize - 1;
     409          if (toBlock > maxBlock) toBlock = maxBlock;
     410          fromIndex = toIndex;
     411          // Data is sorted by block; find index of last spot with: block <= toBlock
     412          // spot given by toIndex should not be included
     413          while (toIndex < dataSize && data.get(toIndex).block <= toBlock)
     414          {
     415            ++toIndex;
     416          }
     417         
     418          if (toIndex > fromIndex)
     419          {
     420            // Exlude low and high values
     421            int count = toIndex - fromIndex;
     422            int lowIndex = fromIndex + (int)Math.floor(count * lowExclude / 100);
     423            int highIndex = toIndex - (int)Math.floor(count * highExclude / 100);
     424           
     425            if (highIndex > lowIndex)
     426            {
     427              // Find median ratio in the current block range
     428              double median = median(data.subList(lowIndex, highIndex));
     429             
     430              // Correct all spot intensities
     431              float sqrtMedRatio = (float)Math.sqrt(median);
     432              float invSqrtMedRatio = 1 / sqrtMedRatio;
     433              for (int j = fromIndex; j < toIndex; ++j)
     434              {
     435                SpotData spot = data.get(j);
     436                double newCh1 = spot.ch1 * invSqrtMedRatio;
     437                double newCh2 = spot.ch2 * sqrtMedRatio;
     438                batcher.insert(bioassayColumn, spot.position, (float)newCh1, (float)newCh2);
     439              }
     440            }
     441            normalizedSpots += toIndex - fromIndex;
     442            if (progress != null) progress.display((int)((normalizedSpots * 100) / numSpots), normalizedSpots + " spots normalized");
     443          }
     444          fromBlock = toBlock + 1;
     445        }
     446      }
     447      batcher.flush();
     448      batcher.close();
     449    }
     450    catch (SQLException e)
     451    {
     452      throw new BaseException(e);
     453    }
     454    return child;
     455  }
     456 
    410457  private RequestInformation getConfigureJobParameters()
    411458  {
     
    444491    }
    445492    return configureJob;
    446   }
    447    
     493  }
     494 
     495  private double median(List<SpotData> data)
     496  {
     497    int size = data.size();
     498    int half = size / 2;
     499    if (size % 2 == 0)
     500    {
     501      return Math.sqrt(data.get(half - 1).ratio * data.get(half).ratio);
     502    }
     503    else
     504    {
     505      return data.get(half).ratio;
     506    }
     507  }
     508 
     509 
     510  private static class SpotData
     511  {
     512    final int position;
     513    final float ch1;
     514    final float ch2;
     515    final double ratio;
     516    final int block;
     517
     518    SpotData(int position, float ch1, float ch2, int block)
     519    {
     520      this.position = position;
     521      this.ch1 = ch1;
     522      this.ch2 = ch2;
     523      this.block = block;
     524      this.ratio = ch1 / ch2;
     525    }
     526  }
    448527}
Note: See TracChangeset for help on using the changeset viewer.