Changeset 5759
- Timestamp:
- Sep 26, 2011, 12:44:15 PM (11 years ago)
- Location:
- trunk
- Files:
-
- 7 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/data/plugin_configfile.xml
r4509 r5759 925 925 </parameter> 926 926 </configuration> 927 <configuration pluginClassName="net.sf.basedb.plugins.gtf.GtfReporterImporter"> 928 <configname>gene_id (no prefix)</configname> 929 <description>A configuration that uses the gene_id (no prefix) instead of the transcript_id as reporter id.</description> 930 <parameter> 931 <name>trimQuotes</name> 932 <label>Remove quotes</label> 933 <description>If true quotes (" or ') around data value will be removed.</description> 934 <class>java.lang.Boolean</class> 935 <value>true</value> 936 </parameter> 937 <parameter> 938 <name>dataHeaderRegexp</name> 939 <label>Data header</label> 940 <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description> 941 <class>java.lang.String</class> 942 <value><seqname>\t.*<gene_id>.*</value> 943 </parameter> 944 <parameter> 945 <name>reporterIdColumnMapping</name> 946 <label>External ID</label> 947 <description>Mapping that picks the reporter's external ID from the data columns. For example: \ID\</description> 948 <class>java.lang.String</class> 949 <value>\gene_id\</value> 950 </parameter> 951 <parameter> 952 <name>minDataColumns</name> 953 <label>Min data columns</label> 954 <description>The minimum number of columns for a line to be counted as a data line.</description> 955 <class>java.lang.Integer</class> 956 <value>4</value> 957 </parameter> 958 <parameter> 959 <name>complexExpressions</name> 960 <label>Complex column mappings</label> 961 <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\' 962 allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description> 963 <class>java.lang.String</class> 964 <value>disallow</value> 965 </parameter> 966 <parameter> 967 <name>charset</name> 968 <label>Character set</label> 969 <description>The character set to use when reading the file. This setting overrides the character set specified by the file. If neither this parameter nor the file specifies a character set, the system default is used (ISO-8859-1).</description> 970 <class>java.lang.String</class> 971 <value>ISO-8859-1</value> 972 </parameter> 973 <parameter> 974 <name>nameColumnMapping</name> 975 <label>Name</label> 976 <description>Mapping that picks the reporter's name from the data columns. For example: \Name\</description> 977 <class>java.lang.String</class> 978 <value>\gene_id\</value> 979 </parameter> 980 <parameter> 981 <name>dataSplitterRegexp</name> 982 <label>Data splitter</label> 983 <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description> 984 <class>java.lang.String</class> 985 <value>\t</value> 986 </parameter> 987 <parameter> 988 <name>decimalSeparator</name> 989 <label>Decimal separator</label> 990 <description>The decimal separator used in numeric values, if not specified dot is assumed.</description> 991 <class>java.lang.String</class> 992 <value>dot</value> 993 </parameter> 994 </configuration> 995 <configuration pluginClassName="net.sf.basedb.plugins.gtf.GtfReporterImporter"> 996 <configname>transcript_id (no prefix)</configname> 997 <description>A configuration that uses the transcript_id (no prefix) as reporter id.</description> 998 <parameter> 999 <name>trimQuotes</name> 1000 <label>Remove quotes</label> 1001 <description>If true quotes (" or ') around data value will be removed.</description> 1002 <class>java.lang.Boolean</class> 1003 <value>true</value> 1004 </parameter> 1005 <parameter> 1006 <name>dataHeaderRegexp</name> 1007 <label>Data header</label> 1008 <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description> 1009 <class>java.lang.String</class> 1010 <value><seqname>\t.*<transcript_id>.*</value> 1011 </parameter> 1012 <parameter> 1013 <name>reporterIdColumnMapping</name> 1014 <label>External ID</label> 1015 <description>Mapping that picks the reporter's external ID from the data columns. For example: \ID\</description> 1016 <class>java.lang.String</class> 1017 <value>\transcript_id\</value> 1018 </parameter> 1019 <parameter> 1020 <name>minDataColumns</name> 1021 <label>Min data columns</label> 1022 <description>The minimum number of columns for a line to be counted as a data line.</description> 1023 <class>java.lang.Integer</class> 1024 <value>4</value> 1025 </parameter> 1026 <parameter> 1027 <name>complexExpressions</name> 1028 <label>Complex column mappings</label> 1029 <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\' 1030 allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description> 1031 <class>java.lang.String</class> 1032 <value>disallow</value> 1033 </parameter> 1034 <parameter> 1035 <name>charset</name> 1036 <label>Character set</label> 1037 <description>The character set to use when reading the file. This setting overrides the character set specified by the file. If neither this parameter nor the file specifies a character set, the system default is used (ISO-8859-1).</description> 1038 <class>java.lang.String</class> 1039 <value>ISO-8859-1</value> 1040 </parameter> 1041 <parameter> 1042 <name>nameColumnMapping</name> 1043 <label>Name</label> 1044 <description>Mapping that picks the reporter's name from the data columns. For example: \Name\</description> 1045 <class>java.lang.String</class> 1046 <value>\transcript_id\</value> 1047 </parameter> 1048 <parameter> 1049 <name>dataSplitterRegexp</name> 1050 <label>Data splitter</label> 1051 <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description> 1052 <class>java.lang.String</class> 1053 <value>\t</value> 1054 </parameter> 1055 <parameter> 1056 <name>decimalSeparator</name> 1057 <label>Decimal separator</label> 1058 <description>The decimal separator used in numeric values, if not specified dot is assumed.</description> 1059 <class>java.lang.String</class> 1060 <value>dot</value> 1061 </parameter> 1062 </configuration> 927 1063 </configfile> -
trunk/src/core/net/sf/basedb/core/Install.java
r5739 r5759 615 615 createMimeType("text/plain", "Sequence Alignment/Map", "sam", null, true); 616 616 createMimeType("application/octet-stream", "Binary Sequence Alignment/Map", "bam", null, false); 617 createMimeType("text/plain", "Gene transfer format", "gtf", null, true); 617 618 618 619 // Plate geometries -
trunk/src/plugins/core/core-plugins.xml
r5734 r5759 762 762 </plugin-definition> 763 763 764 <plugin-definition id="GtfReporterImporter"> 765 <about> 766 <name>GTF reporter importer</name> 767 <description> 768 Creater reporters and reporter lists from GTF (Gene transfer format) 769 files. The default configuration is to use the transcript_id value 770 as the reporter id and name. No other fields are used, but this can 771 be changed by user configurations. For example, to use the gene_id 772 instead or to add prefixes to the id values. The importer 773 builds on the regular reporter importer and performs on-the-fly 774 transformation of GTF attributes to a completely column-based format. 775 </description> 776 </about> 777 <plugin-class>net.sf.basedb.plugins.gtf.GtfReporterImporter</plugin-class> 778 <settings> 779 <property name="everyone-use">1</property> 780 </settings> 781 </plugin-definition> 782 764 783 <!-- 765 784 <plugin-definition id=""> -
trunk/src/plugins/core/net/sf/basedb/plugins/ReporterFlatFileImporter.java
r5689 r5759 733 733 if (reporterListContext) 734 734 { 735 allColumnMappings.add( reporterIdColumnMapping);736 allColumnMappings.add( scoreColumnMapping);735 allColumnMappings.add(cloneParameterWithDefaultValue(reporterIdColumnMapping)); 736 allColumnMappings.add(cloneParameterWithDefaultValue(scoreColumnMapping)); 737 737 } 738 738 else 739 739 { 740 allColumnMappings.add( nameColumnMapping);741 allColumnMappings.add( reporterIdColumnMapping);742 allColumnMappings.add( descriptionColumnMapping);743 allColumnMappings.add( symbolColumnMapping);744 allColumnMappings.add( reporterTypeColumnMapping);740 allColumnMappings.add(cloneParameterWithDefaultValue(nameColumnMapping)); 741 allColumnMappings.add(cloneParameterWithDefaultValue(reporterIdColumnMapping)); 742 allColumnMappings.add(cloneParameterWithDefaultValue(descriptionColumnMapping)); 743 allColumnMappings.add(cloneParameterWithDefaultValue(symbolColumnMapping)); 744 allColumnMappings.add(cloneParameterWithDefaultValue(reporterTypeColumnMapping)); 745 745 List<ExtendedProperty> extended = ExtendedProperties.getProperties("ReporterData"); 746 746 if (extended != null) … … 748 748 for (ExtendedProperty ep : extended) 749 749 { 750 String name = "extendedColumnMapping."+ep.getName(); 750 751 allColumnMappings.add( 751 752 new PluginParameter<String>( 752 "extendedColumnMapping."+ep.getName(),753 name, 753 754 ep.getTitle(), 754 755 ep.getDescription(), 756 (String)getJobOrConfigurationValue(name), 755 757 optionalColumnMapping 756 758 ) … … 900 902 // Parser regular expressions 901 903 parameters.add(parserSection); 902 parameters.add(headerRegexpParameter); 903 parameters.add(dataHeaderRegexpParameter); 904 parameters.add(dataSplitterRegexpParameter); 905 parameters.add(trimQuotesParameter); 906 parameters.add(ignoreRegexpParameter); 907 parameters.add(dataFooterRegexpParameter); 908 parameters.add(minDataColumnsParameter); 909 parameters.add(maxDataColumnsParameter); 910 parameters.add(Parameters.charsetParameter(null, null, null)); 911 parameters.add(Parameters.decimalSeparatorParameter(null, null, null)); 904 parameters.add(cloneParameterWithDefaultValue(headerRegexpParameter)); 905 parameters.add(cloneParameterWithDefaultValue(dataHeaderRegexpParameter)); 906 parameters.add(cloneParameterWithDefaultValue(dataSplitterRegexpParameter)); 907 parameters.add(cloneParameterWithDefaultValue(trimQuotesParameter)); 908 parameters.add(cloneParameterWithDefaultValue(ignoreRegexpParameter)); 909 parameters.add(cloneParameterWithDefaultValue(dataFooterRegexpParameter)); 910 parameters.add(cloneParameterWithDefaultValue(minDataColumnsParameter)); 911 parameters.add(cloneParameterWithDefaultValue(maxDataColumnsParameter)); 912 parameters.add(Parameters.charsetParameter(null, null, 913 (String)getJobOrConfigurationValue(Parameters.CHARSET_PARAMETER))); 914 parameters.add(Parameters.decimalSeparatorParameter(null, null, 915 (String)getJobOrConfigurationValue(Parameters.DECIMAL_SEPARATOR_PARAMETER))); 912 916 913 917 // Column mappings -
trunk/src/test/TestAll.java
r5685 r5759 153 153 results.put("TestDirty", TestDirty.test_all()); 154 154 results.put("TestIlluminaImporter", TestIlluminaImporter.test_all()); 155 results.put("TestGtfInputStream", TestGtfInputStream.test_all()); 156 results.put("TestGtfReporterImporter", TestGtfReporterImporter.test_all()); 155 157 156 158 // Experiments -
trunk/src/test/TestFlatFileParser.java
r5689 r5759 95 95 { 96 96 InputStream in = FileUtil.getInputStream(new java.io.File(file)); 97 test_parse(ffp, file, in, mappings); 98 } 99 catch (Throwable ex) 100 { 101 write("--Parse FAILED (" + file + ")"); 102 ex.printStackTrace(); 103 ok = false; 104 } 105 } 106 107 108 public static void test_parse(FlatFileParser ffp, String file, InputStream in, String... mappings) 109 { 110 try 111 { 97 112 ffp.setInputStream(in, "ISO-8859-1"); 98 113 FlatFileParser.LineType last_line_type = ffp.parseHeaders(); … … 192 207 sb.append(j + "={" + data.get(j) + "}\t"); 193 208 } 209 if (max < data.columns()) sb.append((data.columns() - max) + " more..."); 194 210 if (mappings != null) 195 211 { 212 sb.append("\n"); 196 213 for (Mapper m : mappings) 197 214 { 198 sb.append( "M{" + m.getValue(data) + "}\t");215 sb.append(m+"={" + m.getValue(data) + "}\t"); 199 216 } 200 217 }
Note: See TracChangeset
for help on using the changeset viewer.