Changeset 5773
- Timestamp:
- Sep 29, 2011, 3:14:56 PM (11 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/config/dist/raw-data-types.xml
r4888 r5773 3536 3536 </intensity-formula> 3537 3537 </raw-data-type> 3538 <raw-data-type 3539 id="cufflinks" 3540 channels="1" 3541 name="Cufflinks" 3542 table="RawDataCufflinks" 3543 description="Cufflinks isoforms/gene-level expression values in FPKM tracking format"> 3544 <property 3545 name="coverage" 3546 title="Coverage" 3547 description="Estimate for the absolute depth of read coverage across the object." 3548 column="coverage" 3549 type="float" 3550 averagemethod="arithmetic_mean" 3551 /> 3552 <property 3553 name="fpkm" 3554 title="FPKM" 3555 description="Fragments Per Kilobase of exon per Million fragments mapped." 3556 column="fpkm" 3557 type="float" 3558 averagemethod="geometric_mean" 3559 /> 3560 <property 3561 name="fpkm_lo" 3562 title="FPKM lo" 3563 description="The lower bound of the 95% confidence interval on the FPKM." 3564 column="fpkm_lo" 3565 type="float" 3566 averagemethod="geometric_mean" 3567 /> 3568 <property 3569 name="fpkm_hi" 3570 title="FPKM hi" 3571 description="The upper bound of the 95% confidence interval on the FPKM." 3572 column="fpkm_hi" 3573 type="float" 3574 averagemethod="geometric_mean" 3575 /> 3576 <property 3577 name="status" 3578 title="Status" 3579 description="Quantification status. Can be one of OK (deconvolution successful), LOWDATA (too complex or shallowly sequenced), HIDATA (too many fragments in locus), or FAIL, when an ill-conditioned covariance matrix or other numerical exception prevents deconvolution." 3580 column="status" 3581 type="string" 3582 length="255" 3583 averagemethod="none" 3584 /> 3585 <intensity-formula 3586 name="fpkm" 3587 title="FPKM" 3588 description="Fragments Per Kilobase of exon per Million fragments mapped." 3589 > 3590 <formula 3591 channel="1" 3592 expression="raw('fpkm')" 3593 /> 3594 </intensity-formula> 3595 3596 </raw-data-type> 3538 3597 </raw-data-types> 3539 3598 -
trunk/data/plugin_configfile.xml
r5772 r5773 1232 1232 </parameter> 1233 1233 </configuration> 1234 <configuration pluginClassName="net.sf.basedb.plugins.RawDataFlatFileImporter"> 1235 <configname>Cufflinks isoform FPKM (transcript_id@seqname; no prefix)</configname> 1236 <description>A configuration that import isoforms.fpkm_tracking files and uses <transcript_id>@<seqname> as reporter and feature id.</description> 1237 <parameter> 1238 <name>dataHeaderRegexp</name> 1239 <label>Data header</label> 1240 <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description> 1241 <class>java.lang.String</class> 1242 <value>tracking_id\t.*FPKM.*</value> 1243 </parameter> 1244 <parameter> 1245 <name>complexExpressions</name> 1246 <label>Complex column mappings</label> 1247 <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\' 1248 allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description> 1249 <class>java.lang.String</class> 1250 <value>allow</value> 1251 </parameter> 1252 <parameter> 1253 <name>propertyMapping.status</name> 1254 <label>Status</label> 1255 <description>Quantification status. Can be one of OK (deconvolution successful), LOWDATA (too complex or shallowly sequenced), HIDATA (too many fragments in locus), or FAIL, when an ill-conditioned covariance matrix or other numerical exception prevents deconvolution.</description> 1256 <class>java.lang.String</class> 1257 <value>\status\</value> 1258 </parameter> 1259 <parameter> 1260 <name>charset</name> 1261 <label>Character set</label> 1262 <description>The character set to use when reading the file. This setting overrides the character set specified by the file. If neither this parameter nor the file specifies a character set, the system default is used (ISO-8859-1).</description> 1263 <class>java.lang.String</class> 1264 <value>ISO-8859-1</value> 1265 </parameter> 1266 <parameter> 1267 <name>featureIdColumnMapping</name> 1268 <label>Feature ID</label> 1269 <description>Mapping that picks the spot's feature ID from the data columns. This column is only used when the raw data is connected to an array design which uses the FEATURE_ID method for identifying features. The value is not saved to the database.For example: \Feature ID\</description> 1270 <class>java.lang.String</class> 1271 <value>=col('tracking_id')+'@'+left(col('locus'), ':')</value> 1272 </parameter> 1273 <parameter> 1274 <name>propertyMapping.fpkm_lo</name> 1275 <label>FPKM lo</label> 1276 <description>The lower bound of the 95% confidence interval on the FPKM.</description> 1277 <class>java.lang.String</class> 1278 <value>\FPKM_conf_lo\</value> 1279 </parameter> 1280 <parameter> 1281 <name>dataSplitterRegexp</name> 1282 <label>Data splitter</label> 1283 <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description> 1284 <class>java.lang.String</class> 1285 <value>\t</value> 1286 </parameter> 1287 <parameter> 1288 <name>decimalSeparator</name> 1289 <label>Decimal separator</label> 1290 <description>The decimal separator used in numeric values, if not specified dot is assumed.</description> 1291 <class>java.lang.String</class> 1292 <value>dot</value> 1293 </parameter> 1294 <parameter> 1295 <name>rawDataType</name> 1296 <label>Raw data type</label> 1297 <description>The type of raw data that this importer will import.</description> 1298 <class>java.lang.String</class> 1299 <value>cufflinks</value> 1300 </parameter> 1301 <parameter> 1302 <name>propertyMapping.coverage</name> 1303 <label>Coverage</label> 1304 <description>Estimate for the absolute depth of read coverage across the object.</description> 1305 <class>java.lang.String</class> 1306 <value>\coverage\</value> 1307 </parameter> 1308 <parameter> 1309 <name>propertyMapping.fpkm_hi</name> 1310 <label>FPKM hi</label> 1311 <description>The upper bound of the 95% confidence interval on the FPKM.</description> 1312 <class>java.lang.String</class> 1313 <value>\FPKM_conf_hi\</value> 1314 </parameter> 1315 <parameter> 1316 <name>reporterIdColumnMapping</name> 1317 <label>Reporter ID</label> 1318 <description>Mapping that picks the 'External ID' of the spot's reporter from the data columns. For example: \ID\</description> 1319 <class>java.lang.String</class> 1320 <value>=col('tracking_id')+'@'+left(col('locus'), ':')</value> 1321 </parameter> 1322 <parameter> 1323 <name>trimQuotes</name> 1324 <label>Remove quotes</label> 1325 <description>If true quotes (" or ') around data value will be removed.</description> 1326 <class>java.lang.Boolean</class> 1327 <value>true</value> 1328 </parameter> 1329 <parameter> 1330 <name>propertyMapping.fpkm</name> 1331 <label>FPKM</label> 1332 <description>Fragments Per Kilobase of exon per Million fragments mapped.</description> 1333 <class>java.lang.String</class> 1334 <value>\FPKM\</value> 1335 </parameter> 1336 </configuration> 1337 <configuration pluginClassName="net.sf.basedb.plugins.RawDataFlatFileImporter"> 1338 <configname>Cufflinks isoform FPKM (gene_id; no prefix)</configname> 1339 <description>A configuration that import isoforms.fpkm_tracking files and uses <gene_id> as reporter id and <transcript_id>@<seqname> as feature id.</description> 1340 <parameter> 1341 <name>dataHeaderRegexp</name> 1342 <label>Data header</label> 1343 <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description> 1344 <class>java.lang.String</class> 1345 <value>tracking_id\t.*FPKM.*</value> 1346 </parameter> 1347 <parameter> 1348 <name>complexExpressions</name> 1349 <label>Complex column mappings</label> 1350 <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\' 1351 allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description> 1352 <class>java.lang.String</class> 1353 <value>allow</value> 1354 </parameter> 1355 <parameter> 1356 <name>propertyMapping.status</name> 1357 <label>Status</label> 1358 <description>Quantification status. Can be one of OK (deconvolution successful), LOWDATA (too complex or shallowly sequenced), HIDATA (too many fragments in locus), or FAIL, when an ill-conditioned covariance matrix or other numerical exception prevents deconvolution.</description> 1359 <class>java.lang.String</class> 1360 <value>\status\</value> 1361 </parameter> 1362 <parameter> 1363 <name>charset</name> 1364 <label>Character set</label> 1365 <description>The character set to use when reading the file. This setting overrides the character set specified by the file. If neither this parameter nor the file specifies a character set, the system default is used (ISO-8859-1).</description> 1366 <class>java.lang.String</class> 1367 <value>ISO-8859-1</value> 1368 </parameter> 1369 <parameter> 1370 <name>propertyMapping.fpkm_lo</name> 1371 <label>FPKM lo</label> 1372 <description>The lower bound of the 95% confidence interval on the FPKM.</description> 1373 <class>java.lang.String</class> 1374 <value>\FPKM_conf_lo\</value> 1375 </parameter> 1376 <parameter> 1377 <name>featureIdColumnMapping</name> 1378 <label>Feature ID</label> 1379 <description>Mapping that picks the spot's feature ID from the data columns. This column is only used when the raw data is connected to an array design which uses the FEATURE_ID method for identifying features. The value is not saved to the database.For example: \Feature ID\</description> 1380 <class>java.lang.String</class> 1381 <value>=col('tracking_id')+'@'+left(col('locus'), ':')</value> 1382 </parameter> 1383 <parameter> 1384 <name>decimalSeparator</name> 1385 <label>Decimal separator</label> 1386 <description>The decimal separator used in numeric values, if not specified dot is assumed.</description> 1387 <class>java.lang.String</class> 1388 <value>dot</value> 1389 </parameter> 1390 <parameter> 1391 <name>propertyMapping.coverage</name> 1392 <label>Coverage</label> 1393 <description>Estimate for the absolute depth of read coverage across the object.</description> 1394 <class>java.lang.String</class> 1395 <value>\coverage\</value> 1396 </parameter> 1397 <parameter> 1398 <name>dataSplitterRegexp</name> 1399 <label>Data splitter</label> 1400 <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description> 1401 <class>java.lang.String</class> 1402 <value>\t</value> 1403 </parameter> 1404 <parameter> 1405 <name>rawDataType</name> 1406 <label>Raw data type</label> 1407 <description>The type of raw data that this importer will import.</description> 1408 <class>java.lang.String</class> 1409 <value>cufflinks</value> 1410 </parameter> 1411 <parameter> 1412 <name>propertyMapping.fpkm_hi</name> 1413 <label>FPKM hi</label> 1414 <description>The upper bound of the 95% confidence interval on the FPKM.</description> 1415 <class>java.lang.String</class> 1416 <value>\FPKM_conf_hi\</value> 1417 </parameter> 1418 <parameter> 1419 <name>reporterIdColumnMapping</name> 1420 <label>Reporter ID</label> 1421 <description>Mapping that picks the 'External ID' of the spot's reporter from the data columns. For example: \ID\</description> 1422 <class>java.lang.String</class> 1423 <value>\<gene_id>\</value> 1424 </parameter> 1425 <parameter> 1426 <name>trimQuotes</name> 1427 <label>Remove quotes</label> 1428 <description>If true quotes (" or ') around data value will be removed.</description> 1429 <class>java.lang.Boolean</class> 1430 <value>true</value> 1431 </parameter> 1432 <parameter> 1433 <name>propertyMapping.fpkm</name> 1434 <label>FPKM</label> 1435 <description>Fragments Per Kilobase of exon per Million fragments mapped.</description> 1436 <class>java.lang.String</class> 1437 <value>\FPKM\</value> 1438 </parameter> 1439 </configuration> 1234 1440 </configfile> -
trunk/src/core/net/sf/basedb/core/DataFileType.java
r5764 r5773 157 157 */ 158 158 public static final String REF_SEQ_GTF = "refseq.gtf"; 159 160 /** 161 The external ID for the file type representing a FPKM 162 tracking file. 163 http://cufflinks.cbcb.umd.edu/manual.html#tracking_format 164 @since 3.0 165 */ 166 public static final String FPKM_TRACKING = "sequencing.fpkm_tracking"; 167 159 168 160 169 /** -
trunk/src/core/net/sf/basedb/core/Install.java
r5764 r5773 617 617 createMimeType("application/octet-stream", "Binary Sequence Alignment/Map", "bam", null, false); 618 618 createMimeType("text/plain", "Gene transfer format", "gtf", null, true); 619 createMimeType("text/plain", "FPKM tracking file", "fpkm_tracking", rawDataType, true); 619 620 620 621 // Plate geometries … … 884 885 "The default settings use <transcript_id> to identify genes.", 885 886 Item.ARRAYDESIGN, "gtf", reporterMapType); 887 DataFileTypeData fpkmTrackingFile = createDataFileType( 888 DataFileType.FPKM_TRACKING, "FPKM tracking file", 889 "Gene- or isoform expression levels in Fragments Per Kilobase of exon model per Million mapped fragments.", 890 Item.RAWBIOASSAY, "fpkm_tracking", rawDataType); 886 891 887 892 // Platforms and variants … … 907 912 "Variant for sequence data which has been pre-processed enough to make it possible " + 908 913 "for importing into the database. ", false, null, 0, 909 new PlatformFT(gtfDesignFile, true, false) 914 new PlatformFT(gtfDesignFile, true, false), 915 new PlatformFT(fpkmTrackingFile, true, false) 910 916 ); 911 917 -
trunk/src/core/net/sf/basedb/util/parser/JepMapper.java
r4515 r5773 27 27 28 28 import net.sf.basedb.util.jep.Jep; 29 import net.sf.basedb.util.jep.LeftFunction; 29 30 import net.sf.basedb.util.parser.FlatFileParser.Data; 30 31 … … 89 90 // Replace: \ColumnName\ with: col('ColumnName') 90 91 expression = expression.replaceAll("\\\\([^\\\\]*)\\\\", "col('$1')"); 91 this.parser = Jep.newJep(expression, colFunction, lineNoFunction, dataNoFunction );92 this.parser = Jep.newJep(expression, colFunction, lineNoFunction, dataNoFunction, new LeftFunction()); 92 93 } 93 94
Note: See TracChangeset
for help on using the changeset viewer.