Changeset 3533
- Timestamp:
- Oct 8, 2015, 9:44:57 AM (7 years ago)
- Location:
- extensions/net.sf.basedb.reggie/trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml
r3492 r3533 28 28 <!-- full path to the R script --> 29 29 <path>/path/to/pilot-report.R</path> 30 <!-- full path to directory with SCAN-B reference data --> 31 <!-- default is same directory as the R script --> 32 <ref-dir-scanb></ref-dir-scanb> 30 <!-- full path to directory with reference data --> 31 <!-- default is 'referenceData' directory inside --> 32 <!-- the same directory as the R script --> 33 <ref-dir></ref-dir> 34 <!-- full path to directory with source code --> 35 <!-- default is 'source' directory inside --> 36 <!-- the same directory as the R script --> 37 <source-dir></source-dir> 33 38 <!-- full path to the PDF template --> 34 39 <!-- default is 'template.pdf' in the same directory as the R script --> -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/pdf/PilotReportWorker.java
r3531 r3533 4 4 import java.io.FileInputStream; 5 5 import java.io.IOException; 6 import java.io.InputStream; 6 7 import java.io.OutputStream; 7 8 import java.text.DecimalFormat; … … 13 14 import java.util.List; 14 15 import java.util.Map; 16 import java.util.regex.Pattern; 15 17 16 18 import com.itextpdf.text.Element; … … 31 33 import net.sf.basedb.reggie.r.PilotReport; 32 34 import net.sf.basedb.reggie.r.RResult; 35 import net.sf.basedb.util.FileUtil; 33 36 import net.sf.basedb.util.MD5; 37 import net.sf.basedb.util.parser.FlatFileParser; 38 import net.sf.basedb.util.parser.Mapper; 34 39 35 40 /** … … 37 42 generates a PDF document with the plots and other information. 38 43 39 TODO - this is currently a mix of data from the Gene report (plots),40 random values (molecular subtype) and actual information from the41 database (header section).42 43 44 @author nicklas 44 45 @since 3.7 … … 119 120 { 120 121 // Initialize script in the first call to this method 121 script = new PilotReport(config , PLOT_WIDTH, PLOT_HEIGHT);122 script = new PilotReport(config); 122 123 pdfTemplatePath = Reggie.getConfig().getConfig(config+"/template", null, script.getScriptDir() + "/template.pdf"); 123 124 Reggie.checkFile(pdfTemplatePath, false); … … 145 146 BioSource patient = (BioSource)parents.get(Subtype.PATIENT); 146 147 Site site = Site.findByCaseName(raw.getName()); 147 148 149 148 150 149 PdfUtil pdfUtil = null; … … 193 192 if (site != Site.UNKNOWN) pdfUtil.addText(site.getName(), 12, Element.ALIGN_LEFT, TEXT_X5, TEXT_Y5); 194 193 195 // Molecular subtype 196 String[] subtypeNames = { "LumA", "LumB", "HER2", "Basal", "Normal" }; 197 float[] subtypeScore = new float[5]; 198 int maxIndex = 0; 199 for (int i = 0; i < 5; i++) 200 { 201 subtypeScore[i] = (float)(Math.random()*2-1); 202 if (subtypeScore[i] > subtypeScore[maxIndex]) maxIndex = i; 203 } 204 205 for (int i = 0; i < 5; i++) 206 { 207 if (i == maxIndex) 194 // Molecular subtype information is found in the 'PAM50.txt' output file 195 File workDir = result.getWorkDir(); 196 String[] subtypeNames = { "LumA", "LumB", "Her2", "Basal", "Normal" }; 197 float[] subtypeScores = new float[subtypeNames.length]; 198 String subtypeClass = parsePam50(new File(workDir, "PAM50.txt"), subtypeNames, subtypeScores); 199 pdfUtil.addText(subtypeClass, 14, Element.ALIGN_LEFT, SUBTYPE_X1, SUBTYPE_Y1); 200 for (int scoreNo = 0; scoreNo < subtypeScores.length; scoreNo++) 201 { 202 if (subtypeClass.equals(subtypeNames[scoreNo])) 208 203 { 209 pdfUtil.addBoldText(twoDecimals.format(subtypeScore[i]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+i*SUBTYPE_XX, SUBTYPE_Y2); 210 pdfUtil.addText(subtypeNames[i], 14, Element.ALIGN_LEFT, SUBTYPE_X1, SUBTYPE_Y1); 204 pdfUtil.addBoldText(twoDecimals.format(subtypeScores[scoreNo]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+scoreNo*SUBTYPE_XX, SUBTYPE_Y2); 211 205 } 212 206 else 213 207 { 214 pdfUtil.addText(twoDecimals.format(subtypeScore [i]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+i*SUBTYPE_XX, SUBTYPE_Y2);208 pdfUtil.addText(twoDecimals.format(subtypeScores[scoreNo]), 12, Element.ALIGN_LEFT, SUBTYPE_X2+scoreNo*SUBTYPE_XX, SUBTYPE_Y2); 215 209 } 216 210 } … … 219 213 float y = PLOT_START_Y; 220 214 float yText = PLOT_TEXT_START_Y; 221 File workDir = result.getWorkDir();215 String[] plots = { "GGI", "ESR1", "PGR", "ERBB2", "MKI67" }; 222 216 String[] lowHigh = { "Låg", "Hög" }; 223 217 String[] positiveNegative = { "Negativ", "Positiv" }; 224 225 for (int plotNo = 0; plotNo < 5; plotNo++)226 {218 for (int plotNo = 0; plotNo < plots.length; plotNo++) 219 { 220 // TODO - the text is from a random number! 227 221 String[] options = plotNo == 0 || plotNo == 4 ? lowHigh : positiveNegative; 228 pdfUtil.addText(options[ subtypeScore[plotNo] < 0? 0 : 1], 14, Element.ALIGN_LEFT, PLOT_TEXT_X, yText);222 pdfUtil.addText(options[Math.random() > 0.5 ? 0 : 1], 14, Element.ALIGN_LEFT, PLOT_TEXT_X, yText); 229 223 yText -= PLOT_DELTA_Y; 230 224 231 if (result.genes.size() > plotNo) 225 String plot = plots[plotNo]; 226 File f2 = new File(workDir, plot+".pdf"); 227 if (f2.exists()) 232 228 { 233 String gene = result.genes.get(plotNo); 234 File f2 = new File(workDir, "scanb_"+gene+".pdf"); 235 if (f2.exists()) 236 { 237 pdfUtil.importPdf(new FileInputStream(f2), PLOT_X, y, 1.0f, 1.0f); 238 y -= PLOT_DELTA_Y; 239 } 229 pdfUtil.importPdf(new FileInputStream(f2), PLOT_X, y, 1.0f, 1.0f); 230 y -= PLOT_DELTA_Y; 240 231 } 241 232 } … … 248 239 if (pdfUtil != null) pdfUtil.close(); 249 240 } 250 251 } 252 241 } 242 243 /** 244 The PAM50.txt file has one header line and one data line. 245 246 @param pam50 The PAM50.txt file 247 @param subtypes Column headers in the file we are interested in 248 @param scores Output array for the scores found in the file. Must be 249 of same length as the subtypes array 250 @return The data value for the 'class' column 251 */ 252 private String parsePam50(File pam50, String[] subtypes, float[] scores) 253 throws IOException 254 { 255 FlatFileParser ffp = new FlatFileParser(); 256 ffp.setDataHeaderRegexp(Pattern.compile("class\\tnearest.*")); 257 ffp.setDataSplitterRegexp(Pattern.compile("\\t")); 258 InputStream in = null; 259 String subtypeClass = null; 260 try 261 { 262 in = new FileInputStream(pam50); 263 ffp.setInputStream(in, "UTF-8"); 264 FlatFileParser.LineType line = ffp.parseHeaders(); 265 // Check that we have found the header and has data 266 if (line != FlatFileParser.LineType.DATA_HEADER) 267 { 268 throw new IOException("Can't find data header in file: "+pam50); 269 } 270 FlatFileParser.Data data = ffp.nextData(); 271 if (data == null) 272 { 273 throw new IOException("Can't find data line in file: "+pam50); 274 } 275 276 // Get the subtype class 277 Mapper mapper = ffp.getMapper("\\class\\"); 278 subtypeClass = mapper.getValue(data); 279 // Get subtype scores 280 for (int subtypeNo = 0; subtypeNo < subtypes.length; subtypeNo++) 281 { 282 mapper = ffp.getMapper("\\"+subtypes[subtypeNo]+"\\"); 283 scores[subtypeNo] = mapper.getFloat(data); 284 } 285 } 286 finally 287 { 288 FileUtil.close(in); 289 } 290 return subtypeClass; 291 } 253 292 254 293 /** -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/r/PilotReport.java
r3531 r3533 5 5 import java.io.InputStream; 6 6 import java.io.OutputStream; 7 import java.util.ArrayList;8 import java.util.Arrays;9 import java.util.List;10 import java.util.Map;11 7 12 import net.sf.basedb.core.DataFileType;13 8 import net.sf.basedb.core.DbControl; 14 9 import net.sf.basedb.core.File; … … 18 13 import net.sf.basedb.reggie.dao.Datafiletype; 19 14 import net.sf.basedb.reggie.dao.Rawbioassay; 20 import net.sf.basedb.reggie.pdf.PdfUtil;21 15 import net.sf.basedb.util.FileUtil; 22 import net.sf.basedb.util.Values;23 24 16 25 17 /** … … 27 19 generating a PDF document with the plots. 28 20 29 TODO - this is currently a partial copy of the GeneReport.30 31 21 @author nicklas 32 22 @since 3.7 … … 36 26 { 37 27 38 private final List<String> genes; 39 private final float plotWidth; 40 private final float plotHeight; 41 42 private RFunction scanB; 28 private RFunction pilotReport; 43 29 44 30 /** … … 46 32 parameters. Path to R script must be in configuration file at 47 33 <cfg>/path. Directory with reference data can optionally be 48 specified by <cfg>/ref-dir-scanb 49 or it will be assumed that it is found in the same directory as the R script. 34 specified by <cfg>/ref-dir 35 or it will be assumed that it is found in the 'referenceData' subdirectory 36 to the R script. 50 37 */ 51 public PilotReport(String cfg , float plotWidth, float plotHeight)38 public PilotReport(String cfg) 52 39 throws IOException 53 40 { 54 genes = Arrays.asList("ESR1", "PGR", "ERBB2", "MKI67", "AURKA");55 this.plotWidth = plotWidth;56 this.plotHeight = plotHeight;57 58 41 XmlConfig config = Reggie.getConfig(); 59 42 // Get and check path to script file … … 61 44 Reggie.checkFile(script_path, false); 62 45 setScript(script_path); 63 String ref_dir_scanb = config.getConfig(cfg+"/ref-dir-scanb", null, getScriptDir()); 46 String ref_dir = config.getConfig(cfg+"/ref-dir", null, getScriptDir() + "/referenceData"); 47 String source_dir = config.getConfig(cfg+"/source-dir", null, getScriptDir() + "/source"); 64 48 65 49 // Check that files and directories exists 66 Reggie.checkFile(ref_dir_scanb, true); 50 Reggie.checkFile(ref_dir, true); 51 Reggie.checkFile(source_dir, true); 67 52 68 scanB = addFunction("geneReport");69 setDefault GeneReportParameters(scanB);70 scanB.setParameter("ref.dir", "'" + ref_dir_scanb+ "'");71 scanB.setParameter("file.prefix", "'scanb'");53 pilotReport = addFunction("pilotReport"); 54 setDefaultParameters(pilotReport); 55 pilotReport.setParameter("datadir", "'" + ref_dir + "'"); 56 pilotReport.setParameter("sourcedir", "'" + source_dir + "'"); 72 57 } 73 58 74 private void setDefaultGeneReportParameters(RFunction f) 75 { 76 // These are really required to generate the result that 77 // is compatible with the template pdf 78 f.setParameter("width", plotWidth / PdfUtil.DPI); 79 f.setParameter("height", plotHeight / PdfUtil.DPI); 80 f.setParameter("pointsize", "6"); 81 f.setParameter("outfile", "'pdf'"); 82 f.setParameter("extra.text", "F"); 83 // These are default in the script but if we set them here 84 // there is no way to test other combinations 85 /* 86 f.setParameter("use.fix.xlim", "T"); 87 f.setParameter("box", "T"); 88 f.setParameter("line", "F"); 89 f.setParameter("density", "T"); 90 f.setParameter("weight.density", "T"); 91 f.setParameter("no.yaxis", "F"); 92 */ 93 } 94 95 /** 96 Get the list of genes to get a report for. 97 */ 98 public List<String> getGenes() 99 { 100 return genes; 101 } 59 private void setDefaultParameters(RFunction f) 60 {} 102 61 103 62 /** … … 114 73 throw new ItemNotFoundException(Datafiletype.FPKM.getName() + " for raw bioassay " + raw.getName()); 115 74 } 116 117 // Calculate sum(fpkm) for given genes118 Map<String, Float> sums = raw.getFpkmSum(dc, genes);119 75 120 List<Float> v = new ArrayList<Float>(); 121 for (String gene : genes) 122 { 123 Float s = sums.get(gene); 124 v.add(s == null ? 0 : s); 125 } 126 127 String values = Values.getString(v, ",", true); 128 String caseName = checkValidScriptParameter(raw.getName()); 76 pilotReport.setParameter("cufflinksfile", "'${workdir}/"+fpkmFile.getName()+"'"); 129 77 130 scanB.setParameter("value", "c(" + values + ")"); 131 scanB.setParameter("case", "'"+caseName+"'"); 132 scanB.setParameter("fpkm", "'${workdir}/"+fpkmFile.getName()+"'"); 133 134 Result result = run(new Result(raw, fpkmFile, genes)); 78 Result result = run(new Result(raw, fpkmFile)); 135 79 return result; 136 80 } … … 141 85 public final Rawbioassay raw; 142 86 public final File fpkmFile; 143 public final List<String> genes;144 87 145 88 /** 146 89 Creates a new result object for the given raw bioassay. 147 90 */ 148 public Result(Rawbioassay raw, File fpkmFile , List<String> genes)91 public Result(Rawbioassay raw, File fpkmFile) 149 92 { 150 93 super(); 151 94 this.raw = raw; 152 95 this.fpkmFile = fpkmFile; 153 this.genes = genes;154 96 } 155 97
Note: See TracChangeset
for help on using the changeset viewer.