Changeset 3533


Ignore:
Timestamp:
Oct 8, 2015, 9:44:57 AM (7 years ago)
Author:
Nicklas Nordborg
Message:

References #812: Pilot report wizard

The pilot report now uses the pilot report R script. The molecular subtype values are parsed from the PAM50.txt file. The plots generated from the script are used. The text values on the left side are still based on random numbers.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml

    r3492 r3533  
    2828      <!-- full path to the R script -->
    2929      <path>/path/to/pilot-report.R</path>
    30       <!-- full path to directory with SCAN-B reference data -->
    31       <!-- default is same directory as the R script -->
    32       <ref-dir-scanb></ref-dir-scanb>
     30      <!-- full path to directory with reference data -->
     31      <!-- default is 'referenceData' directory inside -->
     32      <!-- the same directory as the R script -->
     33      <ref-dir></ref-dir>
     34      <!-- full path to directory with source code -->
     35      <!-- default is 'source' directory inside -->
     36      <!-- the same directory as the R script -->
     37      <source-dir></source-dir>
    3338      <!-- full path to the PDF template -->
    3439      <!-- default is 'template.pdf' in the same directory as the R script -->
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/pdf/PilotReportWorker.java

    r3531 r3533  
    44import java.io.FileInputStream;
    55import java.io.IOException;
     6import java.io.InputStream;
    67import java.io.OutputStream;
    78import java.text.DecimalFormat;
     
    1314import java.util.List;
    1415import java.util.Map;
     16import java.util.regex.Pattern;
    1517
    1618import com.itextpdf.text.Element;
     
    3133import net.sf.basedb.reggie.r.PilotReport;
    3234import net.sf.basedb.reggie.r.RResult;
     35import net.sf.basedb.util.FileUtil;
    3336import net.sf.basedb.util.MD5;
     37import net.sf.basedb.util.parser.FlatFileParser;
     38import net.sf.basedb.util.parser.Mapper;
    3439
    3540/**
     
    3742  generates a PDF document with the plots and other information.
    3843 
    39   TODO - this is currently a mix of data from the Gene report (plots),
    40   random values (molecular subtype) and actual information from the
    41   database (header section).
    42 
    4344  @author nicklas
    4445  @since 3.7
     
    119120    {
    120121      // Initialize script in the first call to this method
    121       script = new PilotReport(config, PLOT_WIDTH, PLOT_HEIGHT);
     122      script = new PilotReport(config);
    122123      pdfTemplatePath = Reggie.getConfig().getConfig(config+"/template", null, script.getScriptDir() + "/template.pdf");
    123124      Reggie.checkFile(pdfTemplatePath, false);
     
    145146    BioSource patient = (BioSource)parents.get(Subtype.PATIENT);
    146147    Site site = Site.findByCaseName(raw.getName());
    147    
    148    
    149148   
    150149    PdfUtil pdfUtil = null;
     
    193192      if (site != Site.UNKNOWN) pdfUtil.addText(site.getName(), 12, Element.ALIGN_LEFT, TEXT_X5, TEXT_Y5);
    194193     
    195       // Molecular subtype
    196       String[] subtypeNames = { "LumA", "LumB", "HER2", "Basal", "Normal" };
    197       float[] subtypeScore = new float[5];
    198       int maxIndex = 0;
    199       for (int i = 0; i < 5; i++)
    200       {
    201         subtypeScore[i] = (float)(Math.random()*2-1);
    202         if (subtypeScore[i] > subtypeScore[maxIndex]) maxIndex = i;
    203       }
    204      
    205       for (int i = 0; i < 5; i++)
    206       {
    207         if (i == maxIndex)
     194      // Molecular subtype information is found in the 'PAM50.txt' output file
     195      File workDir = result.getWorkDir();
     196      String[] subtypeNames = { "LumA", "LumB", "Her2", "Basal", "Normal" };
     197      float[] subtypeScores = new float[subtypeNames.length];
     198      String subtypeClass = parsePam50(new File(workDir, "PAM50.txt"), subtypeNames, subtypeScores);
     199      pdfUtil.addText(subtypeClass, 14, Element.ALIGN_LEFT, SUBTYPE_X1, SUBTYPE_Y1);
     200      for (int scoreNo = 0; scoreNo < subtypeScores.length; scoreNo++)
     201      {
     202        if (subtypeClass.equals(subtypeNames[scoreNo]))
    208203        {
    209           pdfUtil.addBoldText(twoDecimals.format(subtypeScore[i]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+i*SUBTYPE_XX, SUBTYPE_Y2);
    210           pdfUtil.addText(subtypeNames[i], 14, Element.ALIGN_LEFT, SUBTYPE_X1, SUBTYPE_Y1);
     204          pdfUtil.addBoldText(twoDecimals.format(subtypeScores[scoreNo]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+scoreNo*SUBTYPE_XX, SUBTYPE_Y2);
    211205        }
    212206        else
    213207        {
    214           pdfUtil.addText(twoDecimals.format(subtypeScore[i]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+i*SUBTYPE_XX, SUBTYPE_Y2);
     208          pdfUtil.addText(twoDecimals.format(subtypeScores[scoreNo]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+scoreNo*SUBTYPE_XX, SUBTYPE_Y2);
    215209        }
    216210      }
     
    219213      float y = PLOT_START_Y;
    220214      float yText = PLOT_TEXT_START_Y;
    221       File workDir = result.getWorkDir();
     215      String[] plots = { "GGI", "ESR1", "PGR", "ERBB2", "MKI67" };
    222216      String[] lowHigh = { "Låg", "Hög" };
    223217      String[] positiveNegative = { "Negativ", "Positiv" };
    224      
    225       for (int plotNo = 0; plotNo < 5; plotNo++)
    226       {
     218      for (int plotNo = 0; plotNo < plots.length; plotNo++)
     219      {
     220        // TODO - the text is from a random number!
    227221        String[] options = plotNo == 0 || plotNo == 4 ? lowHigh : positiveNegative;
    228         pdfUtil.addText(options[subtypeScore[plotNo] < 0 ? 0 : 1], 14, Element.ALIGN_LEFT, PLOT_TEXT_X, yText);
     222        pdfUtil.addText(options[Math.random() > 0.5 ? 0 : 1], 14, Element.ALIGN_LEFT, PLOT_TEXT_X, yText);
    229223        yText -= PLOT_DELTA_Y;
    230224
    231         if (result.genes.size() > plotNo)
     225        String plot = plots[plotNo];
     226        File f2 = new File(workDir, plot+".pdf");
     227        if (f2.exists())
    232228        {
    233           String gene = result.genes.get(plotNo);
    234           File f2 = new File(workDir, "scanb_"+gene+".pdf");
    235           if (f2.exists())
    236           {
    237             pdfUtil.importPdf(new FileInputStream(f2), PLOT_X, y, 1.0f, 1.0f);
    238             y -= PLOT_DELTA_Y;
    239           }
     229          pdfUtil.importPdf(new FileInputStream(f2), PLOT_X, y, 1.0f, 1.0f);
     230          y -= PLOT_DELTA_Y;
    240231        }
    241232      }
     
    248239      if (pdfUtil != null) pdfUtil.close();
    249240    }
    250    
    251   }
    252  
     241  }
     242 
     243  /**
     244    The PAM50.txt file has one header line and one data line.
     245   
     246    @param pam50 The PAM50.txt file
     247    @param subtypes Column headers in the file we are interested in
     248    @param scores Output array for the scores found in the file. Must be
     249      of same length as the subtypes array
     250    @return The data value for the 'class' column
     251  */
     252  private String parsePam50(File pam50, String[] subtypes, float[] scores)
     253    throws IOException
     254  {
     255    FlatFileParser ffp = new FlatFileParser();
     256    ffp.setDataHeaderRegexp(Pattern.compile("class\\tnearest.*"));
     257    ffp.setDataSplitterRegexp(Pattern.compile("\\t"));
     258    InputStream in = null;
     259    String subtypeClass = null;
     260    try
     261    {
     262      in = new FileInputStream(pam50);
     263      ffp.setInputStream(in, "UTF-8");
     264      FlatFileParser.LineType line = ffp.parseHeaders();
     265      // Check that we have found the header and has data
     266      if (line != FlatFileParser.LineType.DATA_HEADER)
     267      {
     268        throw new IOException("Can't find data header in file: "+pam50);
     269      }
     270      FlatFileParser.Data data = ffp.nextData();
     271      if (data == null)
     272      {
     273        throw new IOException("Can't find data line in file: "+pam50);
     274      }
     275     
     276      // Get the subtype class
     277      Mapper mapper = ffp.getMapper("\\class\\");
     278      subtypeClass = mapper.getValue(data);
     279      // Get subtype scores
     280      for (int subtypeNo = 0; subtypeNo < subtypes.length; subtypeNo++)
     281      {
     282        mapper = ffp.getMapper("\\"+subtypes[subtypeNo]+"\\");
     283        scores[subtypeNo] = mapper.getFloat(data);
     284      }
     285    }
     286    finally
     287    {
     288      FileUtil.close(in);
     289    }
     290    return subtypeClass;
     291  }
    253292 
    254293  /**
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/r/PilotReport.java

    r3531 r3533  
    55import java.io.InputStream;
    66import java.io.OutputStream;
    7 import java.util.ArrayList;
    8 import java.util.Arrays;
    9 import java.util.List;
    10 import java.util.Map;
    117
    12 import net.sf.basedb.core.DataFileType;
    138import net.sf.basedb.core.DbControl;
    149import net.sf.basedb.core.File;
     
    1813import net.sf.basedb.reggie.dao.Datafiletype;
    1914import net.sf.basedb.reggie.dao.Rawbioassay;
    20 import net.sf.basedb.reggie.pdf.PdfUtil;
    2115import net.sf.basedb.util.FileUtil;
    22 import net.sf.basedb.util.Values;
    23 
    2416
    2517/**
     
    2719  generating a PDF document with the plots.
    2820 
    29   TODO - this is currently a partial copy of the GeneReport.
    30 
    3121  @author nicklas
    3222  @since 3.7
     
    3626{
    3727
    38   private final List<String> genes;
    39   private final float plotWidth;
    40   private final float plotHeight;
    41 
    42   private RFunction scanB;
     28  private RFunction pilotReport;
    4329 
    4430  /**
     
    4632    parameters. Path to R script must be in configuration file at
    4733    <cfg>/path. Directory with reference data can optionally be
    48     specified by <cfg>/ref-dir-scanb
    49     or it will be assumed that it is found in the same directory as the R script.
     34    specified by <cfg>/ref-dir
     35    or it will be assumed that it is found in the 'referenceData' subdirectory
     36    to the R script.
    5037  */
    51   public PilotReport(String cfg, float plotWidth, float plotHeight)
     38  public PilotReport(String cfg)
    5239    throws IOException
    5340  {
    54     genes = Arrays.asList("ESR1", "PGR", "ERBB2", "MKI67", "AURKA");
    55     this.plotWidth = plotWidth;
    56     this.plotHeight = plotHeight;
    57 
    5841    XmlConfig config = Reggie.getConfig();
    5942    // Get and check path to script file
     
    6144    Reggie.checkFile(script_path, false);
    6245    setScript(script_path);
    63     String ref_dir_scanb = config.getConfig(cfg+"/ref-dir-scanb", null, getScriptDir());
     46    String ref_dir = config.getConfig(cfg+"/ref-dir", null, getScriptDir() + "/referenceData");
     47    String source_dir = config.getConfig(cfg+"/source-dir", null, getScriptDir() + "/source");
    6448
    6549    // Check that files and directories exists
    66     Reggie.checkFile(ref_dir_scanb, true);
     50    Reggie.checkFile(ref_dir, true);
     51    Reggie.checkFile(source_dir, true);
    6752
    68     scanB = addFunction("geneReport");
    69     setDefaultGeneReportParameters(scanB);
    70     scanB.setParameter("ref.dir", "'" + ref_dir_scanb + "'");
    71     scanB.setParameter("file.prefix", "'scanb'");
     53    pilotReport = addFunction("pilotReport");
     54    setDefaultParameters(pilotReport);
     55    pilotReport.setParameter("datadir", "'" + ref_dir + "'");
     56    pilotReport.setParameter("sourcedir", "'" + source_dir + "'");
    7257  }
    7358
    74   private void setDefaultGeneReportParameters(RFunction f)
    75   {
    76     // These are really required to generate the result that
    77     // is compatible with the template pdf
    78     f.setParameter("width", plotWidth / PdfUtil.DPI);
    79     f.setParameter("height", plotHeight / PdfUtil.DPI);
    80     f.setParameter("pointsize", "6");
    81     f.setParameter("outfile", "'pdf'");
    82     f.setParameter("extra.text", "F");
    83     // These are default in the script but if we set them here
    84     // there is no way to test other combinations
    85     /*
    86     f.setParameter("use.fix.xlim", "T");
    87     f.setParameter("box", "T");
    88     f.setParameter("line", "F");
    89     f.setParameter("density", "T");
    90     f.setParameter("weight.density", "T");
    91     f.setParameter("no.yaxis", "F");
    92     */
    93   }
    94  
    95   /**
    96     Get the list of genes to get a report for.
    97   */
    98   public List<String> getGenes()
    99   {
    100     return genes;
    101   }
     59  private void setDefaultParameters(RFunction f)
     60  {}
    10261 
    10362  /**
     
    11473      throw new ItemNotFoundException(Datafiletype.FPKM.getName() + " for raw bioassay " + raw.getName());
    11574    }
    116        
    117     // Calculate sum(fpkm) for given genes
    118     Map<String, Float> sums = raw.getFpkmSum(dc, genes);
    11975
    120     List<Float> v = new ArrayList<Float>();
    121     for (String gene : genes)
    122     {
    123       Float s = sums.get(gene);
    124       v.add(s == null ? 0 : s);
    125     }
    126 
    127     String values = Values.getString(v, ",", true);
    128     String caseName = checkValidScriptParameter(raw.getName());
     76    pilotReport.setParameter("cufflinksfile", "'${workdir}/"+fpkmFile.getName()+"'");
    12977   
    130     scanB.setParameter("value", "c(" + values + ")");
    131     scanB.setParameter("case", "'"+caseName+"'");
    132     scanB.setParameter("fpkm", "'${workdir}/"+fpkmFile.getName()+"'");
    133    
    134     Result result = run(new Result(raw, fpkmFile, genes));
     78    Result result = run(new Result(raw, fpkmFile));
    13579    return result;
    13680  }
     
    14185    public final Rawbioassay raw;
    14286    public final File fpkmFile;
    143     public final List<String> genes;
    14487   
    14588    /**
    14689      Creates a new result object for the given raw bioassay.
    14790    */
    148     public Result(Rawbioassay raw, File fpkmFile, List<String> genes)
     91    public Result(Rawbioassay raw, File fpkmFile)
    14992    {
    15093      super();
    15194      this.raw = raw;
    15295      this.fpkmFile = fpkmFile;
    153       this.genes = genes;
    15496    }
    15597
Note: See TracChangeset for help on using the changeset viewer.