Changeset 1212


Ignore:
Timestamp:
Mar 26, 2010, 12:02:17 PM (12 years ago)
Author:
Jari Häkkinen
Message:

Adding support to save background expression matrix to file.

Location:
plugins/base2/net.sf.basedb.agilent/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.agilent/trunk/README_PluginDetails

    r1199 r1212  
    1515=== Parameters ===
    1616
    17 The only parameter to set is how background intensities should be
    18 calculated. Allowed values are median or mean, i.e. the background is
    19 either the median or the mean of the negative control spots on the
    20 array.
     17There is one parameter to set that specifies how background
     18intensities should be calculated. Allowed values are median or mean,
     19i.e. the background is either the median or the mean of the negative
     20control spots on the array.
     21
     22The expression of the background probes is optionally saved to a
     23file. The default is not to save the expression matrix but this can be
     24changed during job configuration.
    2125
    2226
  • plugins/base2/net.sf.basedb.agilent/trunk/src/net/sf/basedb/agilent/plugins/BackgroundCorrection.java

    r1199 r1212  
    2424package net.sf.basedb.agilent.plugins;
    2525
     26import net.sf.basedb.core.AnyToAny;
    2627import net.sf.basedb.core.BaseException;
    2728import net.sf.basedb.core.BioAssay;
    2829import net.sf.basedb.core.BioAssaySet;
     30import net.sf.basedb.core.BooleanParameterType;
    2931import net.sf.basedb.core.DbControl;
     32import net.sf.basedb.core.Directory;
    3033import net.sf.basedb.core.DynamicResultIterator;
    3134import net.sf.basedb.core.DynamicSpotQuery;
    3235import net.sf.basedb.core.Experiment;
     36import net.sf.basedb.core.File;
    3337import net.sf.basedb.core.InvalidDataException;
    3438import net.sf.basedb.core.Item;
    3539import net.sf.basedb.core.Job;
     40import net.sf.basedb.core.Location;
    3641import net.sf.basedb.core.Permission;
    3742import net.sf.basedb.core.PluginParameter;
     
    5459import net.sf.basedb.core.query.Expression;
    5560import net.sf.basedb.core.query.Expressions;
     61import net.sf.basedb.core.query.JoinType;
    5662import net.sf.basedb.core.query.Orders;
    5763import net.sf.basedb.core.query.Restriction;
     
    6773import net.sf.basedb.util.Values;
    6874
     75import java.io.PrintWriter;
    6976import java.sql.SQLException;
    7077import java.util.ArrayList;
     
    7279import java.util.Collection;
    7380import java.util.EnumSet;
     81import java.util.HashMap;
    7482import java.util.HashSet;
     83import java.util.Iterator;
    7584import java.util.List;
    7685import java.util.Set;
     86import java.util.TreeSet;
     87import java.util.Vector;
    7788
    7889/**
     
    145156      );
    146157
     158  private static final
     159  PluginParameter<Boolean> saveBgParameter = new PluginParameter<Boolean>
     160    ( "saveBackgroud",
     161      "Save background information",
     162      "Setting this to true instructs the plug-in to generate a file " +
     163      "containing information about the background probes used in the " +
     164      "calculation. The file is self-explanatory and the file format may " +
     165      "change at any time.",
     166      new BooleanParameterType(false, true)
     167      );
     168
    147169  /**
    148170    Make it possible to abort the job.
    149171   */
    150172  private ThreadSignalHandler signalHandler;
     173
     174
     175  /*
     176    Variables to keep track of spot data for negative controls. The
     177    information must be collected before writing it to file in matrix
     178    format.
     179
     180    Spot data collected in the negative control queries are stored in
     181    the SData class.
     182
     183    The allNegPos TreeSet is used to collect positions of all negative
     184    controls occuring in the queries (bioassays may have different
     185    sets of negative control probes due to filtering and
     186    deletion). The key is the position of the spot.
     187
     188    The negSData Vector keeps track of spot data for negative control
     189    spots per bioassay. The HashMap is just a map from the position to
     190    the data stored about that position.
     191  */
     192  private class SData
     193  {
     194    public String externalId;
     195    public float intensity;
     196
     197    private int ch1Index;
     198    private int externalIdIndex;
     199    private int positionIndex;
     200    // keep track if to save to file since externalId is only
     201    // available when saving of background expression values is
     202    // requested (avoid left joins in SQL).
     203    private Boolean saveBG;
     204
     205    /**
     206       This constructor will not create a fully functional SData
     207       object, i.e., extractData cannot be called from objects created
     208       with this constructor. For SData objects to be usable for
     209       SqlResult you must use the SData(DynamicResultIterator)
     210       constructor.
     211     */
     212    public SData(SData sd)
     213    {
     214      this.externalId=sd.externalId;
     215      this.intensity=sd.intensity;
     216    }
     217    public SData(DynamicResultIterator dri, Boolean sBG) throws SQLException
     218    {
     219      saveBG=sBG;
     220      ch1Index = dri.getIndex(VirtualColumn.channelIntensity(1).getName());
     221      externalIdIndex = (saveBG ? dri.getIndex("externalId") : -1);
     222      positionIndex = dri.getIndex(VirtualColumn.POSITION.getName());
     223    }
     224    public int extractData(SqlResult sr) throws SQLException
     225    {
     226      intensity  = sr.getFloat(ch1Index);
     227      externalId = (saveBG ? new String(sr.getString(externalIdIndex)) : null);
     228      return sr.getInt(positionIndex);
     229    }
     230  }
     231  private TreeSet<Integer> allNegPos=null;
     232  private Vector<HashMap<Integer,SData>> negSData=null;
    151233
    152234
     
    178260        // Plug-in options
    179261        storeValue(job, request, methodParameter);
     262        storeValue(job, request, saveBgParameter);
    180263
    181264        // Estimate execution time
     
    232315        // background correction method
    233316        parameters.add(methodParameter);
    234  
     317
     318        // whether to save background probe information
     319        parameters.add(saveBgParameter);
     320
    235321        configureJob = new RequestInformation(
    236322          Request.COMMAND_CONFIGURE_JOB, "Agilent background correction",
     
    318404
    319405
    320   private float mean(DynamicResultIterator dri, int index) throws SQLException
     406  private float mean(DynamicResultIterator dri, int index, Boolean saveBG)
     407    throws SQLException
    321408  {
    322409    float mean=0;
    323410    int nofSpots=0;
     411    HashMap<Integer,SData> hmap=(saveBG ? new HashMap<Integer,SData>() : null);
     412    SData sdata=new SData(dri,saveBG);
     413
    324414    while (dri.hasNext())
    325415    {
    326       mean+=dri.next().getFloat(index);
     416      SqlResult sr = dri.next();
     417      mean+=sr.getFloat(index);
    327418      ++nofSpots;
    328     }
    329     dri.close();
     419      if (saveBG)
     420      {
     421        Integer position=sdata.extractData(sr);
     422        allNegPos.add(position);
     423        hmap.put(position,new SData(sdata));
     424      }
     425    }
     426
     427    if (saveBG) negSData.add(hmap);
     428
    330429    return (nofSpots!=0 ? mean/nofSpots : 0);
    331430  }
    332431
    333432
    334   private float median(DynamicResultIterator dri, int index) throws SQLException
     433  private float median(DynamicResultIterator dri, int index, Boolean saveBG)
     434    throws SQLException
    335435  {
    336436    long count=dri.getTotalCount();
     
    341441
    342442    int nofSpots=0;
    343     SqlResult r = null;
    344     while (nofSpots++<count) r=dri.next();
    345 
    346     // odd number, next number is the median
    347     if (!even) return dri.next().getFloat(index);
    348     // even number, median is average of this and next number
    349     return (r.getFloat(index) + dri.next().getFloat(index))/2;
     443    float median=0;
     444    HashMap<Integer,SData> hmap=(saveBG ? new HashMap<Integer,SData>() : null);
     445    SData sdata=new SData(dri,saveBG);
     446
     447    while (dri.hasNext())
     448    {
     449      SqlResult sr=dri.next();
     450      ++nofSpots;
     451      // odd number, median is the centre
     452      // even number, median is the average of two values in the centre
     453      if ((nofSpots==count) || (even && nofSpots==(count+1))) median+=sr.getFloat(index);
     454
     455      if (saveBG)
     456      {
     457        Integer position=sdata.extractData(sr);
     458        allNegPos.add(position);
     459        hmap.put(position,new SData(sdata));
     460      }
     461    }
     462
     463    if (saveBG) negSData.add(hmap);
     464
     465    // even number of spot,  median is an average of two values
     466    return (even ? median/2 : median);
    350467  }
    351468
     
    406523      }
    407524
     525      // setup for optionally saving background information in a file
     526      PrintWriter bgWriter = null;
     527      Boolean saveBG = (Boolean)job.getValue(saveBgParameter.getName());
     528      if (saveBG)
     529      {
     530        Directory expDir=source.getExperiment().getDirectory();
     531        File bgData=File.getFile(dc, expDir,
     532                                 about.getName() + "_backgroundProbes_" +
     533                                 thisJob.getId() + ".txt", true);
     534        bgData.setLocation(Location.PRIMARY);
     535        bgData.setMimeType("text/plain");
     536        dc.saveItem(bgData);
     537
     538        // Associate files in a directory with transformation
     539        String root = expDir.getPath().toString();
     540        String ataName=bgData.getPath().toString().replace(root,"linkTo");
     541        AnyToAny ata = AnyToAny.getNew(dc, t, bgData, ataName, false);
     542        dc.saveItem(ata);
     543
     544        bgWriter = new PrintWriter(bgData.getUploadStream(false,true));
     545        bgWriter.println("# File generated by plug-in: " + about.getName());
     546        bgWriter.println("#\n# Background based on " +
     547                         ( method.equals("Illumina BeadStudio median") ?
     548                           "median" : "mean" ) +
     549                         " expression of background probes.");
     550        bgWriter.println("# The probes considered to be background probes are");
     551        bgWriter.println("# listed below in the expression matrix.");
     552        bgWriter.println("#\n# Assay name: Background expression level");
     553
     554        // Initialiaze negative control spot data collection support
     555        allNegPos=new TreeSet<Integer>();
     556        //  negSData=new Vector<HashMap<Integer,SData>>(size); done below
     557        //  when number of assays is known.
     558
     559        // add reporter externalId to the query
     560        query.joinReporters(JoinType.LEFT);
     561        query.select(Dynamic.selectReporter("externalId"));
     562      }
     563
    408564      try
    409565      {
     
    411567        // Caluclate one bioassay at a time
    412568        List<BioAssay> assays = source.getBioAssays().list(dc);
     569        // Initialiaze negative control spot data collection support
     570        if (saveBG) negSData=new Vector<HashMap<Integer,SData>>(assays.size());
    413571        float[] backgroundCh1=new float[assays.size()];
    414572        int assayNo=0;
     
    423581          backgroundCh1[assayNo]=
    424582            ( method.equals("Median background") ?
    425               median(dri,ch1Index) : mean(dri,ch1Index) );
     583              median(dri,ch1Index,saveBG) : mean(dri,ch1Index,saveBG) );
     584
     585          if (saveBG) bgWriter.println("# " + assay.getName() + ": " +
     586                                       backgroundCh1[assayNo]);
     587
     588          dri.close();
    426589          ++assayNo;
    427590        }
    428591        query.reset();
     592
     593        // Optionally write information to negative control file.
     594        if (saveBG)
     595        {
     596          bgWriter.println("#\n# Number of assays: " + negSData.size());
     597          bgWriter.println("# Number of spots : " + allNegPos.size());
     598          bgWriter.print("#\n# Position\tExternalId");
     599          for (BioAssay assay : assays) bgWriter.print("\t"+assay.getName());
     600          bgWriter.println();
     601
     602          // write expression matrix
     603          Iterator<Integer> position=allNegPos.iterator();
     604          while (position.hasNext())
     605          {
     606            int pos=position.next();
     607            String externalId=null;
     608            for (int i=0; i<assays.size(); ++i)
     609            {
     610              SData sdata=negSData.get(i).get(pos);
     611              if (sdata!=null)
     612              {
     613                externalId=sdata.externalId;
     614                break;
     615              }
     616            }
     617            bgWriter.print(pos + "\t" + externalId);
     618            for (int i=0; i<assays.size(); ++i)
     619            {
     620              SData sdata=negSData.get(i).get(pos);
     621              bgWriter.print("\t" + ((sdata!=null) ? sdata.intensity : ""));
     622            }
     623            bgWriter.println();
     624          }
     625
     626          // cleanup negative control file creation support
     627          bgWriter.close();
     628          allNegPos=null;   // tell GC that allNegPos   is not needed anymore
     629          negSData=null; // tell GC that negSpotdata is not needed anymore
     630        }
     631
    429632        int nofAssays=assayNo;
    430633
Note: See TracChangeset for help on using the changeset viewer.