Changeset 1114 for plugins/base2


Ignore:
Timestamp:
Jun 10, 2009, 12:09:36 AM (14 years ago)
Author:
Jari Häkkinen
Message:

Addresses #113. The Pvalue calculation is in place. Need to figure details of std and average calculations.

Location:
plugins/base2/net.sf.basedb.illumina/trunk
Files:
12 added
3 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.illumina/trunk/LICENSE

    r940 r1114  
    1 Files are copyright by their respective authors. The contributions to
    2 files where copyright is not explicitly stated can be traced with the
    3 source code revision system.
     1Source code in src/org/apache/commons is unmodified copies of the
     2files retrieved from the repository (revision 783095). The files are
     3licensed under the Apache license 2.0. The license is available at
     4http://www.apache.org/licenses/LICENSE-2.0
     5
     6
     7All other files are copyright by their respective authors. The
     8contributions to files where copyright is not explicitly stated can be
     9traced with the baseplugins source code revision system.
    410
    511This file is part of Illumina plug-in package for BASE.
  • plugins/base2/net.sf.basedb.illumina/trunk/README_PluginDetails

    r1113 r1114  
    5555current bioassay set.
    5656
     57Whole genome beadchips:
     58
    5759For all signals `i` calculate the detection P-value as
    58 `Pvalue = 1-i/N` where `i` is the rank of the signal relative to
    59 the negative controls and `N` is the number of negative controls.
     60`Pvalue = 1-i/N` where `R` is the rank of the signal i relative to the
     61negative controls and `N` is the number of negative controls.
     62
     63Others (DASL, miRNA, VeraCode DASL, and Focused Arrays):
     64
     65For all signals `i` calculate the detection P-value as
     66`Pvalue = 1/2 - erf( [i-AvgControl]/StdControl/sqrt(2) )` where
     67`AvgControl` is the average intensity of the negative controls,
     68`StdControl` is the standard deviation of the ... (to be defined and
     69implemented accordingly), and `erf` is the error function
     70(http://mathworld.wolfram.com/Erf.html). The error function is used
     71for arguments witin the range (-4,4). To save CPU cycles, the function
     72value for arguments outside this range is set to -1 and 1,
     73respectively.
    6074
    6175
  • plugins/base2/net.sf.basedb.illumina/trunk/src/net/sf/basedb/illumina/plugins/DetectionPValue.java

    r1112 r1114  
    6767import net.sf.basedb.util.Values;
    6868
     69import org.apache.commons.math.MathException;
     70import org.apache.commons.math.special.Erf;
     71
    6972import java.sql.SQLException;
    7073import java.util.ArrayList;
     
    117120    ( "arraytype",
    118121      "Array type",
    119       "BeadStudio like detection P-value calculation for array types:\n\n" +
    120       "Illumina Whole Genome BeadChips\n" +
    121       "Illumina Focused Arrays (not yet implemented)",
     122      "Illumina BeadStudio like detection P-value calculation.\n\n" +
     123      "Supported array types:\n\n" +
     124      " - Whole Genome BeadChips\n" +
     125      " - Others (DASL, miRNA, VeraCode DASL, and Focused Arrays)",
    122126      new StringParameterType(255, "Whole Genome BeadChip", true, 1, 0, 0,
    123127        Arrays.asList(
    124           new String[] { "Whole Genome BeadChip", "Focused Array (not yet implemented)" }
     128          new String[] { "Whole Genome BeadChip",
     129                         "Others" }
    125130        )
    126131      )
     
    352357
    353358
    354   float pvalue(float[] sortedArray, int startIndex, int stopIndex, float intensity)
     359  float pvalueOthers(float intensityA, float intensityS, float intensity)
     360    throws MathException
     361  {
     362    double arg=(intensity-intensityA)/intensityS/1.414213562;
     363    // No need to waste iterations in Erf for large or small arguments.
     364    if (arg>4)
     365      return 0;
     366    else if (arg<-4)
     367      return 1;
     368    return (float)(0.5 - Erf.erf(arg)/2);
     369  }
     370
     371
     372  float pvalueWG(float[] sortedArray, int startIndex, int stopIndex, float intensity)
    355373  {
    356374    int i=stopIndex;
     
    461479            (Float)job.getValue(outlierParameter.getName()));
    462480
     481          // Calculate average control intensity and std only if
     482          // needed, i.e., if not calculating detection P-value for
     483          // whole genome beadchips.
     484          float cIntAvg=0;
     485          float cIntStd=0;
     486          if (arrayType.equals("Others")) { // must match arrayTypeParameter
     487            float x=0;
     488            float xx=0;
     489            for (int i=controlStartStop[0]; i<=controlStartStop[1]; ++i)
     490            {
     491              x+=sortedControlIntensity[i];
     492              xx+=sortedControlIntensity[i]*sortedControlIntensity[i];
     493            }
     494            int n=controlStartStop[1]-controlStartStop[0]+1;
     495            cIntAvg=x/n;
     496            cIntStd=(float)Math.sqrt((xx-x*x/n)/n);
     497          }
     498
    463499          checkInterrupted();
    464500
     
    472508          {
    473509            SqlResult result = dri.next();
    474             batcher.insert(result.getShort(columnIndex), result.getInt(positionIndex),
    475               pvalue(sortedControlIntensity, controlStartStop[0],
    476               controlStartStop[1], result.getFloat(ch1Index)));
     510            float pval=( arrayType.equals("Whole Genome BeadChip") ?
     511                         pvalueWG(sortedControlIntensity, controlStartStop[0],
     512                                  controlStartStop[1], result.getFloat(ch1Index)) :
     513                         pvalueOthers(cIntAvg, cIntStd, result.getFloat(ch1Index)) );
     514            batcher.insert(result.getShort(columnIndex), result.getInt(positionIndex), pval);
    477515          }
    478516          dri.close();
Note: See TracChangeset for help on using the changeset viewer.