source: trunk/data/illumina.configurations.xml @ 5764

Last change on this file since 5764 was 5764, checked in by Nicklas Nordborg, 10 years ago

Fixes #1624: Create array design (feature) importer for GTF files

  • Created GtfReporterMapImporter and two configurations that uses transcript_id/gene_id for reporter ids. The GTF importer builds on the ReporterMapFlatFileImporter but has less configuration options. Eg. it is locked to use FEATURE_ID for identifying features.
  • Added a new parameter to the regular ReporterMapFlatFileImporter that makes it possible to limit a configuration to certain platforms. This is used to hide genepix, illumina, etc. when using the sequencing platform.
  • Added SEQUENCING platform and SEQUENCING_EXPRESSION variant.
  • Created a GTF validator that hooks into the fileset validation extension point for array designs and extracts the number of unique transcript_id:s from the GTF file.
File size: 22.9 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE configfile SYSTEM "plugin-configuration-file.dtd"><configfile>
3  <configuration pluginClassName="net.sf.basedb.plugins.ReporterFlatFileImporter">
4    <configname>Reporters from Illumina raw data file</configname>
5    <description>This configuration can import reporters from an Illumina raw data file. No annotation information can be imported since only the reporter ID is available in the file.</description>
6    <parameter>
7      <name>extendedColumnMapping.accession</name>
8      <label>Accession</label>
9      <description />
10      <class />
11      <value />
12    </parameter>
13    <parameter>
14      <name>minDataColumns</name>
15      <label>Min data columns</label>
16      <description>The minimum number of columns for a line to be counted as a data line.</description>
17      <class />
18      <value />
19    </parameter>
20    <parameter>
21      <name>dataFooterRegexp</name>
22      <label>Data footer</label>
23      <description>A regular expression that matches the first line of non-data after the data lines. For example: __END_OF_DATA__</description>
24      <class />
25      <value />
26    </parameter>
27    <parameter>
28      <name>extendedColumnMapping.cytoband</name>
29      <label>Cytoband</label>
30      <description>The cytoband from which the reporter is derived</description>
31      <class />
32      <value />
33    </parameter>
34    <parameter>
35      <name>extendedColumnMapping.markers</name>
36      <label>Markers</label>
37      <description />
38      <class />
39      <value />
40    </parameter>
41    <parameter>
42      <name>extendedColumnMapping.omim</name>
43      <label>OMIM</label>
44      <description />
45      <class />
46      <value />
47    </parameter>
48    <parameter>
49      <name>extendedColumnMapping.tissue</name>
50      <label>Tissue</label>
51      <description>The tissue from which the reporter is derived</description>
52      <class />
53      <value />
54    </parameter>
55    <parameter>
56      <name>ignoreRegexp</name>
57      <label>Ignore</label>
58      <description>A regular expression that matches any line that should be ignored. For example, ignore lines starting with #: ^#.*</description>
59      <class />
60      <value />
61    </parameter>
62    <parameter>
63      <name>descriptionColumnMapping</name>
64      <label>Description</label>
65      <description>Mapping that picks the reporter's description from the data columns. For example: \Description\</description>
66      <class />
67      <value />
68    </parameter>
69    <parameter>
70      <name>extendedColumnMapping.clusterId</name>
71      <label>Cluster ID</label>
72      <description>A unique identifier for a Unigene entry</description>
73      <class />
74      <value />
75    </parameter>
76    <parameter>
77      <name>decimalSeparator</name>
78      <label>Decimal separator</label>
79      <description>The decimal separator used in numeric values, if not specified dot is assumed.</description>
80      <class>java.lang.String</class>
81      <value>dot</value>
82    </parameter>
83    <parameter>
84      <name>trimQuotes</name>
85      <label>Remove quotes</label>
86      <description>If true quotes (" or ') around data value will be removed.</description>
87      <class>java.lang.Boolean</class>
88      <value>true</value>
89    </parameter>
90    <parameter>
91      <name>extendedColumnMapping.locusLink</name>
92      <label>LocusLink</label>
93      <description />
94      <class />
95      <value />
96    </parameter>
97    <parameter>
98      <name>extendedColumnMapping.library</name>
99      <label>Library</label>
100      <description>The library from which the reporter is derived</description>
101      <class />
102      <value />
103    </parameter>
104    <parameter>
105      <name>maxDataColumns</name>
106      <label>Max data columns</label>
107      <description>The maximum number of columns for a line to be counted as a data line, or 0 to allow any number of columns.</description>
108      <class />
109      <value />
110    </parameter>
111    <parameter>
112      <name>extendedColumnMapping.chromosome</name>
113      <label>Chromosome</label>
114      <description>The chromosome from which the reporter is derived</description>
115      <class />
116      <value />
117    </parameter>
118    <parameter>
119      <name>symbolColumnMapping</name>
120      <label>Gene symbol</label>
121      <description>Mapping that picks the reporter's gene symbol from the data columns. For example: \Gene symbol\</description>
122      <class />
123      <value />
124    </parameter>
125    <parameter>
126      <name>scoreColumnMapping</name>
127      <label>Score</label>
128      <description>Mapping that picks the reporter's score in some context. This mapping is only used when importing to a reporter list.</description>
129      <class />
130      <value />
131    </parameter>
132    <parameter>
133      <name>headerRegexp</name>
134      <label>Header</label>
135      <description>A regular expression that matches a header line and extracts the name and a value parts. For example, split on equal symbol: (.+)=(.*)</description>
136      <class />
137      <value />
138    </parameter>
139    <parameter>
140      <name>dataHeaderRegexp</name>
141      <label>Data header</label>
142      <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description>
143      <class>java.lang.String</class>
144      <value>TargetID,.*</value>
145    </parameter>
146    <parameter>
147      <name>reporterType</name>
148      <label>Reporter type</label>
149      <description>The reporter type assigned to the imported reporters</description>
150      <class />
151      <value />
152    </parameter>
153    <parameter>
154      <name>extendedColumnMapping.length</name>
155      <label>Length</label>
156      <description>The length of the sequence</description>
157      <class />
158      <value />
159    </parameter>
160    <parameter>
161      <name>complexExpressions</name>
162      <label>Complex column mappings</label>
163      <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\'
164allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description>
165      <class>java.lang.String</class>
166      <value>disallow</value>
167    </parameter>
168    <parameter>
169      <name>reporterTypeColumnMapping</name>
170      <label>Reporter type</label>
171      <description>Mapping that pick the reporter's type from the data columns. This will overide the reporter type parameter. For example: \Reporter type\</description>
172      <class />
173      <value />
174    </parameter>
175    <parameter>
176      <name>charset</name>
177      <label>Character set</label>
178      <description>The character set used in the file, if not specified the default character set is used (ISO-8859-1).</description>
179      <class>java.lang.String</class>
180      <value>ISO-8859-1</value>
181    </parameter>
182    <parameter>
183      <name>dataSplitterRegexp</name>
184      <label>Data splitter</label>
185      <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description>
186      <class>java.lang.String</class>
187      <value>,</value>
188    </parameter>
189    <parameter>
190      <name>extendedColumnMapping.antibiotics</name>
191      <label>Antibiotics</label>
192      <description />
193      <class />
194      <value />
195    </parameter>
196    <parameter>
197      <name>reporterIdColumnMapping</name>
198      <label>Reporter ID</label>
199      <description>Mapping that picks the reporter's ID from the data columns. For example: \ID\</description>
200      <class>java.lang.String</class>
201      <value>\TargetID\</value>
202    </parameter>
203    <parameter>
204      <name>extendedColumnMapping.species</name>
205      <label>Species</label>
206      <description>The organism from which the reporter is derived</description>
207      <class />
208      <value />
209    </parameter>
210    <parameter>
211      <name>extendedColumnMapping.sequence</name>
212      <label>Sequence</label>
213      <description>The nucleotide sequence of the reporter</description>
214      <class />
215      <value />
216    </parameter>
217    <parameter>
218      <name>nameColumnMapping</name>
219      <label>Name</label>
220      <description>Mapping that picks the reporter's name from the data columns. For example: \Name\</description>
221      <class>java.lang.String</class>
222      <value>\TargetID\</value>
223    </parameter>
224    <parameter>
225      <name>extendedColumnMapping.vector</name>
226      <label>Vector</label>
227      <description>The vector from which the reporter is derived</description>
228      <class />
229      <value />
230    </parameter>
231    <parameter>
232      <name>extendedColumnMapping.nid</name>
233      <label>NID</label>
234      <description />
235      <class />
236      <value />
237    </parameter>
238  </configuration>
239  <configuration pluginClassName="net.sf.basedb.plugins.ReporterFlatFileImporter">
240    <configname>Reporters from Illumina annotation file</configname>
241    <description>Import reporter annotations from an Illumina annotation file. This configurations matches as many annotations as possible to the standard BASE reporter annotations. Some annotations can't be mapped and it is recommended that a server admin extends the reporter table and re-configure this plug-in to also include the other annotations.</description>
242    <parameter>
243      <name>extendedColumnMapping.accession</name>
244      <label>Accession</label>
245      <description />
246      <class>java.lang.String</class>
247      <value>\Accession\</value>
248    </parameter>
249    <parameter>
250      <name>minDataColumns</name>
251      <label>Min data columns</label>
252      <description>The minimum number of columns for a line to be counted as a data line.</description>
253      <class />
254      <value />
255    </parameter>
256    <parameter>
257      <name>dataFooterRegexp</name>
258      <label>Data footer</label>
259      <description>A regular expression that matches the first line of non-data after the data lines. For example: __END_OF_DATA__</description>
260      <class />
261      <value />
262    </parameter>
263    <parameter>
264      <name>extendedColumnMapping.cytoband</name>
265      <label>Cytoband</label>
266      <description>The cytoband from which the reporter is derived</description>
267      <class />
268      <value />
269    </parameter>
270    <parameter>
271      <name>extendedColumnMapping.omim</name>
272      <label>OMIM</label>
273      <description />
274      <class />
275      <value />
276    </parameter>
277    <parameter>
278      <name>extendedColumnMapping.markers</name>
279      <label>Markers</label>
280      <description />
281      <class />
282      <value />
283    </parameter>
284    <parameter>
285      <name>extendedColumnMapping.tissue</name>
286      <label>Tissue</label>
287      <description>The tissue from which the reporter is derived</description>
288      <class />
289      <value />
290    </parameter>
291    <parameter>
292      <name>ignoreRegexp</name>
293      <label>Ignore</label>
294      <description>A regular expression that matches any line that should be ignored. For example, ignore lines starting with #: ^#.*</description>
295      <class />
296      <value />
297    </parameter>
298    <parameter>
299      <name>descriptionColumnMapping</name>
300      <label>Description</label>
301      <description>Mapping that picks the reporter's description from the data columns. For example: \Description\</description>
302      <class>java.lang.String</class>
303      <value>\Definition\</value>
304    </parameter>
305    <parameter>
306      <name>extendedColumnMapping.clusterId</name>
307      <label>Cluster ID</label>
308      <description>A unique identifier for a Unigene entry</description>
309      <class />
310      <value />
311    </parameter>
312    <parameter>
313      <name>decimalSeparator</name>
314      <label>Decimal separator</label>
315      <description>The decimal separator used in numeric values, if not specified dot is assumed.</description>
316      <class>java.lang.String</class>
317      <value>dot</value>
318    </parameter>
319    <parameter>
320      <name>trimQuotes</name>
321      <label>Remove quotes</label>
322      <description>If true quotes (" or ') around data value will be removed.</description>
323      <class>java.lang.Boolean</class>
324      <value>true</value>
325    </parameter>
326    <parameter>
327      <name>extendedColumnMapping.locusLink</name>
328      <label>LocusLink</label>
329      <description />
330      <class />
331      <value />
332    </parameter>
333    <parameter>
334      <name>extendedColumnMapping.library</name>
335      <label>Library</label>
336      <description>The library from which the reporter is derived</description>
337      <class />
338      <value />
339    </parameter>
340    <parameter>
341      <name>maxDataColumns</name>
342      <label>Max data columns</label>
343      <description>The maximum number of columns for a line to be counted as a data line, or 0 to allow any number of columns.</description>
344      <class />
345      <value />
346    </parameter>
347    <parameter>
348      <name>extendedColumnMapping.chromosome</name>
349      <label>Chromosome</label>
350      <description>The chromosome from which the reporter is derived</description>
351      <class />
352      <value />
353    </parameter>
354    <parameter>
355      <name>symbolColumnMapping</name>
356      <label>Gene symbol</label>
357      <description>Mapping that picks the reporter's gene symbol from the data columns. For example: \Gene symbol\</description>
358      <class>java.lang.String</class>
359      <value>\Symbol\</value>
360    </parameter>
361    <parameter>
362      <name>headerRegexp</name>
363      <label>Header</label>
364      <description>A regular expression that matches a header line and extracts the name and a value parts. For example, split on equal symbol: (.+)=(.*)</description>
365      <class />
366      <value />
367    </parameter>
368    <parameter>
369      <name>scoreColumnMapping</name>
370      <label>Score</label>
371      <description>Mapping that picks the reporter's score in some context. This mapping is only used when importing to a reporter list.</description>
372      <class />
373      <value />
374    </parameter>
375    <parameter>
376      <name>dataHeaderRegexp</name>
377      <label>Data header</label>
378      <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description>
379      <class>java.lang.String</class>
380      <value>Search_key,Target,.*</value>
381    </parameter>
382    <parameter>
383      <name>reporterType</name>
384      <label>Reporter type</label>
385      <description>The reporter type assigned to the imported reporters</description>
386      <class />
387      <value />
388    </parameter>
389    <parameter>
390      <name>extendedColumnMapping.length</name>
391      <label>Length</label>
392      <description>The length of the sequence</description>
393      <class />
394      <value />
395    </parameter>
396    <parameter>
397      <name>complexExpressions</name>
398      <label>Complex column mappings</label>
399      <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\'
400allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description>
401      <class>java.lang.String</class>
402      <value>disallow</value>
403    </parameter>
404    <parameter>
405      <name>reporterTypeColumnMapping</name>
406      <label>Reporter type</label>
407      <description>Mapping that pick the reporter's type from the data columns. This will overide the reporter type parameter. For example: \Reporter type\</description>
408      <class />
409      <value />
410    </parameter>
411    <parameter>
412      <name>charset</name>
413      <label>Character set</label>
414      <description>The character set used in the file, if not specified the default character set is used (ISO-8859-1).</description>
415      <class>java.lang.String</class>
416      <value>ISO-8859-1</value>
417    </parameter>
418    <parameter>
419      <name>dataSplitterRegexp</name>
420      <label>Data splitter</label>
421      <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description>
422      <class>java.lang.String</class>
423      <value>,(?=(?:[^"]*"[^"]*")*(?![^"]*"))</value>
424    </parameter>
425    <parameter>
426      <name>reporterIdColumnMapping</name>
427      <label>Reporter ID</label>
428      <description>Mapping that picks the reporter's ID from the data columns. For example: \ID\</description>
429      <class>java.lang.String</class>
430      <value>\Target\</value>
431    </parameter>
432    <parameter>
433      <name>extendedColumnMapping.antibiotics</name>
434      <label>Antibiotics</label>
435      <description />
436      <class />
437      <value />
438    </parameter>
439    <parameter>
440      <name>extendedColumnMapping.species</name>
441      <label>Species</label>
442      <description>The organism from which the reporter is derived</description>
443      <class />
444      <value />
445    </parameter>
446    <parameter>
447      <name>extendedColumnMapping.sequence</name>
448      <label>Sequence</label>
449      <description>The nucleotide sequence of the reporter</description>
450      <class>java.lang.String</class>
451      <value>\Probe_Sequence\</value>
452    </parameter>
453    <parameter>
454      <name>nameColumnMapping</name>
455      <label>Name</label>
456      <description>Mapping that picks the reporter's name from the data columns. For example: \Name\</description>
457      <class>java.lang.String</class>
458      <value>\Target\</value>
459    </parameter>
460    <parameter>
461      <name>extendedColumnMapping.vector</name>
462      <label>Vector</label>
463      <description>The vector from which the reporter is derived</description>
464      <class />
465      <value />
466    </parameter>
467    <parameter>
468      <name>extendedColumnMapping.nid</name>
469      <label>NID</label>
470      <description />
471      <class />
472      <value />
473    </parameter>
474  </configuration>
475  <configuration pluginClassName="net.sf.basedb.plugins.ReporterMapFlatFileImporter">
476    <configname>Features from Illumina raw data file</configname>
477    <description>Import array design feature from an Illumina raw data file. Since the raw data file doesn't contain any coordinate information we fake it like this: block=1, column=1, row=line number in file&#xD;
478&#xD;
479The line number starts at 1 for the first data line (headers are ignored). This is the same as the IlluminaRawDataImporter plug-in does so it should be possible to connect the raw bioassays with an array design if needed.</description>
480    <parameter>
481      <name>platforms</name>
482      <label>Platforms/variants</label>
483      <description>Select all platforms/variants where this configuration can be used. If not selected, the configuration can be used on all except file-only platforms.</description>
484      <class>java.lang.String</class>
485      <value>P:generic</value>
486    </parameter>
487    <parameter>
488      <name>dataHeaderRegexp</name>
489      <label>Data header</label>
490      <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description>
491      <class>java.lang.String</class>
492      <value>TargetID,.*</value>
493    </parameter>
494    <parameter>
495      <name>minDataColumns</name>
496      <label>Min data columns</label>
497      <description>The minimum number of columns for a line to be counted as a data line.</description>
498      <class />
499      <value />
500    </parameter>
501    <parameter>
502      <name>dataFooterRegexp</name>
503      <label>Data footer</label>
504      <description>A regular expression that matches the first line of non-data after the data lines. For example: __END_OF_DATA__</description>
505      <class />
506      <value />
507    </parameter>
508    <parameter>
509      <name>complexExpressions</name>
510      <label>Complex column mappings</label>
511      <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\'
512allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description>
513      <class>java.lang.String</class>
514      <value>allow</value>
515    </parameter>
516    <parameter>
517      <name>charset</name>
518      <label>Character set</label>
519      <description>The character set used in the file, if not specified the default character set is used (ISO-8859-1).</description>
520      <class>java.lang.String</class>
521      <value>ISO-8859-1</value>
522    </parameter>
523    <parameter>
524      <name>ignoreRegexp</name>
525      <label>Ignore</label>
526      <description>A regular expression that matches any line that should be ignored. For example, ignore lines starting with #: ^#.*</description>
527      <class />
528      <value />
529    </parameter>
530    <parameter>
531      <name>columnColumnMapping</name>
532      <label>Column</label>
533      <description>Mapping that picks the feature's column position in a block from the data columns. For example: \Column\</description>
534      <class>java.lang.String</class>
535      <value>1</value>
536    </parameter>
537    <parameter>
538      <name>dataSplitterRegexp</name>
539      <label>Data splitter</label>
540      <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description>
541      <class>java.lang.String</class>
542      <value>,</value>
543    </parameter>
544    <parameter>
545      <name>blockColumnMapping</name>
546      <label>Block</label>
547      <description>Mapping that picks the feature's block number from the data columns. You must specify either this mapping or mappings for the meta coordinates. Example: \Block\</description>
548      <class>java.lang.String</class>
549      <value>1</value>
550    </parameter>
551    <parameter>
552      <name>metaGridXColumnMapping</name>
553      <label>Meta grid X</label>
554      <description>Mapping that picks the feature's meta grid X coordinate from the data columns. Required if you don't specify a block mapping. Example: \Meta grid X\</description>
555      <class />
556      <value />
557    </parameter>
558    <parameter>
559      <name>reporterIdColumnMapping</name>
560      <label>Reporter ID</label>
561      <description>Mapping that picks the reporter's ID from the data columns. For example: \ID\</description>
562      <class>java.lang.String</class>
563      <value>\TargetID\</value>
564    </parameter>
565    <parameter>
566      <name>trimQuotes</name>
567      <label>Remove quotes</label>
568      <description>If true quotes (" or ') around data value will be removed.</description>
569      <class>java.lang.Boolean</class>
570      <value>true</value>
571    </parameter>
572    <parameter>
573      <name>maxDataColumns</name>
574      <label>Max data columns</label>
575      <description>The maximum number of columns for a line to be counted as a data line, or 0 to allow any number of columns.</description>
576      <class />
577      <value />
578    </parameter>
579    <parameter>
580      <name>rowColumnMapping</name>
581      <label>Row</label>
582      <description>Mapping that picks the feature's row position in a block from the data columns. For example: \Row\</description>
583      <class>java.lang.String</class>
584      <value>=dataNo()</value>
585    </parameter>
586    <parameter>
587      <name>metaGridYColumnMapping</name>
588      <label>Meta grid Y</label>
589      <description>Mapping that picks the feature's meta grid Y coordinate from the data columns. Required if you don't specify a block mapping. Example: \Meta grid Y\</description>
590      <class />
591      <value />
592    </parameter>
593    <parameter>
594      <name>headerRegexp</name>
595      <label>Header</label>
596      <description>A regular expression that matches a header line and extracts the name and a value parts. For example, split on equal symbol: (.+)=(.*)</description>
597      <class>java.lang.String</class>
598      <value>(.+)=(.*?),*</value>
599    </parameter>
600  </configuration>
601</configfile>
Note: See TracBrowser for help on using the repository browser.