Changeset 559


Ignore:
Timestamp:
Jan 29, 2008, 8:51:39 AM (13 years ago)
Author:
Nicklas Nordborg
Message:

References #94: Import reporter annotations from bgx files

The [Controls] section is now handled better. The configuration parameters are still hardcoded into the class.

Location:
trunk/net/sf/basedb/illumina
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/net/sf/basedb/illumina/META-INF/base-configurations.xml

    r546 r559  
    11<?xml version="1.0" encoding="UTF-8"?>
    2 <!DOCTYPE configfile SYSTEM "plugin-configuration-file.dtd">
    3 <configfile>
    4 <!-- None of the plugins has configurations-->
     2<!DOCTYPE configfile SYSTEM "plugin-configuration-file.dtd"><configfile>
     3  <configuration pluginClassName="net.sf.basedb.illumina.plugins.BgxReporterImporter">
     4    <configname>Default</configname>
     5    <description>A standard configuration that assumes that the default configuration for extended properties are used + the extra extended properties that are shipped with this plug-in package</description>
     6    <parameter>
     7      <name>extendedColumnMapping.accession</name>
     8      <label>Accession</label>
     9      <description />
     10      <class>java.lang.String</class>
     11      <value>\Accession\</value>
     12    </parameter>
     13    <parameter>
     14      <name>minDataColumns</name>
     15      <label>Min data columns</label>
     16      <description>The minimum number of columns for a line to be counted as a data line.</description>
     17      <class>java.lang.Integer</class>
     18      <value>10</value>
     19    </parameter>
     20    <parameter>
     21      <name>dataFooterRegexp</name>
     22      <label>Data footer</label>
     23      <description>A regular expression that matches the first line of non-data after the data lines. For example: __END_OF_DATA__</description>
     24      <class />
     25      <value />
     26    </parameter>
     27    <parameter>
     28      <name>extendedColumnMapping.cytoband</name>
     29      <label>Cytoband</label>
     30      <description>The cytoband from which the reporter is derived</description>
     31      <class />
     32      <value />
     33    </parameter>
     34    <parameter>
     35      <name>extendedColumnMapping.omim</name>
     36      <label>OMIM</label>
     37      <description />
     38      <class />
     39      <value />
     40    </parameter>
     41    <parameter>
     42      <name>extendedColumnMapping.markers</name>
     43      <label>Markers</label>
     44      <description />
     45      <class />
     46      <value />
     47    </parameter>
     48    <parameter>
     49      <name>extendedColumnMapping.tissue</name>
     50      <label>Tissue</label>
     51      <description>The tissue from which the reporter is derived</description>
     52      <class />
     53      <value />
     54    </parameter>
     55    <parameter>
     56      <name>ignoreRegexp</name>
     57      <label>Ignore</label>
     58      <description>A regular expression that matches any line that should be ignored. For example, ignore lines starting with #: ^#.*</description>
     59      <class />
     60      <value />
     61    </parameter>
     62    <parameter>
     63      <name>descriptionColumnMapping</name>
     64      <label>Description</label>
     65      <description>Mapping that picks the reporter's description from the data columns. For example: \Description\</description>
     66      <class>java.lang.String</class>
     67      <value>\Definition\</value>
     68    </parameter>
     69    <parameter>
     70      <name>extendedColumnMapping.clusterId</name>
     71      <label>Cluster ID</label>
     72      <description>A unique identifier for a Unigene entry</description>
     73      <class>java.lang.String</class>
     74      <value>\Unigene_ID\</value>
     75    </parameter>
     76    <parameter>
     77      <name>decimalSeparator</name>
     78      <label>Decimal separator</label>
     79      <description>The decimal separator used in numeric values, if not specified dot is assumed.</description>
     80      <class>java.lang.String</class>
     81      <value>dot</value>
     82    </parameter>
     83    <parameter>
     84      <name>trimQuotes</name>
     85      <label>Remove quotes</label>
     86      <description>If true quotes (" or ') around data value will be removed.</description>
     87      <class>java.lang.Boolean</class>
     88      <value>true</value>
     89    </parameter>
     90    <parameter>
     91      <name>extendedColumnMapping.locusLink</name>
     92      <label>LocusLink</label>
     93      <description />
     94      <class />
     95      <value />
     96    </parameter>
     97    <parameter>
     98      <name>extendedColumnMapping.library</name>
     99      <label>Library</label>
     100      <description>The library from which the reporter is derived</description>
     101      <class />
     102      <value />
     103    </parameter>
     104    <parameter>
     105      <name>maxDataColumns</name>
     106      <label>Max data columns</label>
     107      <description>The maximum number of columns for a line to be counted as a data line, or 0 to allow any number of columns.</description>
     108      <class />
     109      <value />
     110    </parameter>
     111    <parameter>
     112      <name>extendedColumnMapping.chromosome</name>
     113      <label>Chromosome</label>
     114      <description>The chromosome from which the reporter is derived</description>
     115      <class>java.lang.String</class>
     116      <value>\Chromosome\</value>
     117    </parameter>
     118    <parameter>
     119      <name>symbolColumnMapping</name>
     120      <label>Gene symbol</label>
     121      <description>Mapping that picks the reporter's gene symbol from the data columns. For example: \Gene symbol\</description>
     122      <class>java.lang.String</class>
     123      <value>\Symbol\</value>
     124    </parameter>
     125    <parameter>
     126      <name>headerRegexp</name>
     127      <label>Header</label>
     128      <description>A regular expression that matches a header line and extracts the name and a value parts. For example, split on equal symbol: (.+)=(.*)</description>
     129      <class />
     130      <value />
     131    </parameter>
     132    <parameter>
     133      <name>scoreColumnMapping</name>
     134      <label>Score</label>
     135      <description>Mapping that picks the reporter's score in some context. This mapping is only used when importing to a reporter list.</description>
     136      <class />
     137      <value />
     138    </parameter>
     139    <parameter>
     140      <name>dataHeaderRegexp</name>
     141      <label>Data header</label>
     142      <description>A regular expression that matches the header line just before the data begins. For example: Block\tRow\tColumn.*</description>
     143      <class>java.lang.String</class>
     144      <value>\QSpecies  Source  Search_Key  Transcript  ILMN_Gene Source_Reference_ID RefSeq_ID Unigene_ID  Entrez_Gene_ID  GI  Accession Symbol  Protein_Product Probe_Id  Array_Address_Id  Probe_Type  Probe_Start Probe_Sequence  Chromosome  Probe_Chr_Orientation Probe_Coordinates Definition  Ontology_Component  Ontology_Process  Ontology_Function Synonyms  Obsolete_Probe_Id\E</value>
     145    </parameter>
     146    <parameter>
     147      <name>reporterType</name>
     148      <label>Reporter type</label>
     149      <description>The reporter type assigned to the imported reporters</description>
     150      <class />
     151      <value />
     152    </parameter>
     153    <parameter>
     154      <name>extendedColumnMapping.length</name>
     155      <label>Length</label>
     156      <description>The length of the sequence</description>
     157      <class />
     158      <value />
     159    </parameter>
     160    <parameter>
     161      <name>complexExpressions</name>
     162      <label>Complex column mappings</label>
     163      <description>disallow = Only allow simple mappings that are constant value or pick the value from one column only, for example, '1.6' or '\Row\'
     164allow = Allow expression and complex mappings, for example, '\Row\, \Column\' or '=2*col('radius')'</description>
     165      <class>java.lang.String</class>
     166      <value>disallow</value>
     167    </parameter>
     168    <parameter>
     169      <name>reporterTypeColumnMapping</name>
     170      <label>Reporter type</label>
     171      <description>Mapping that pick the reporter's type from the data columns. This will overide the reporter type parameter. For example: \Reporter type\</description>
     172      <class />
     173      <value />
     174    </parameter>
     175    <parameter>
     176      <name>charset</name>
     177      <label>Character set</label>
     178      <description>The character set used in the file, if not specified the default character set is used (ISO-8859-1).</description>
     179      <class>java.lang.String</class>
     180      <value>ISO-8859-1</value>
     181    </parameter>
     182    <parameter>
     183      <name>dataSplitterRegexp</name>
     184      <label>Data splitter</label>
     185      <description>A regular expression that splits each data line into individual columns. For example, split on tabs: \t</description>
     186      <class>java.lang.String</class>
     187      <value>\t</value>
     188    </parameter>
     189    <parameter>
     190      <name>reporterIdColumnMapping</name>
     191      <label>Reporter ID</label>
     192      <description>Mapping that picks the reporter's ID from the data columns. For example: \ID\</description>
     193      <class>java.lang.String</class>
     194      <value>\Probe_Id\</value>
     195    </parameter>
     196    <parameter>
     197      <name>extendedColumnMapping.antibiotics</name>
     198      <label>Antibiotics</label>
     199      <description />
     200      <class />
     201      <value />
     202    </parameter>
     203    <parameter>
     204      <name>extendedColumnMapping.species</name>
     205      <label>Species</label>
     206      <description>The organism from which the reporter is derived</description>
     207      <class>java.lang.String</class>
     208      <value>\Species\</value>
     209    </parameter>
     210    <parameter>
     211      <name>extendedColumnMapping.sequence</name>
     212      <label>Sequence</label>
     213      <description>The nucleotide sequence of the reporter</description>
     214      <class>java.lang.String</class>
     215      <value>\Probe_Sequence\</value>
     216    </parameter>
     217    <parameter>
     218      <name>nameColumnMapping</name>
     219      <label>Name</label>
     220      <description>Mapping that picks the reporter's name from the data columns. For example: \Name\</description>
     221      <class>java.lang.String</class>
     222      <value>\Probe_Id\</value>
     223    </parameter>
     224    <parameter>
     225      <name>extendedColumnMapping.vector</name>
     226      <label>Vector</label>
     227      <description>The vector from which the reporter is derived</description>
     228      <class />
     229      <value />
     230    </parameter>
     231    <parameter>
     232      <name>extendedColumnMapping.nid</name>
     233      <label>NID</label>
     234      <description />
     235      <class />
     236      <value />
     237    </parameter>
     238  </configuration>
    5239</configfile>
  • trunk/net/sf/basedb/illumina/src/net/sf/basedb/illumina/plugins/BgxReporterImporter.java

    r556 r559  
    2929import java.io.PushbackInputStream;
    3030import java.util.HashMap;
     31import java.util.List;
    3132import java.util.Map;
    3233import java.util.regex.Pattern;
     
    3435
    3536import net.sf.basedb.core.BaseException;
     37import net.sf.basedb.core.InvalidDataException;
     38import net.sf.basedb.core.Job;
     39import net.sf.basedb.core.ParameterType;
     40import net.sf.basedb.core.PermissionDeniedException;
     41import net.sf.basedb.core.PluginConfiguration;
     42import net.sf.basedb.core.PluginDefinition;
     43import net.sf.basedb.core.SessionControl;
    3644import net.sf.basedb.core.plugin.About;
    3745import net.sf.basedb.core.plugin.AboutImpl;
    3846import net.sf.basedb.core.plugin.InteractivePlugin;
     47import net.sf.basedb.core.plugin.ParameterValues;
    3948import net.sf.basedb.illumina.Illumina;
    4049import net.sf.basedb.plugins.ReporterFlatFileImporter;
     
    4352import net.sf.basedb.util.parser.FlatFileParser;
    4453import net.sf.basedb.util.parser.WrappedConfigureByExample;
    45 import net.sf.basedb.util.parser.FlatFileParser.Data;
    4654import net.sf.basedb.util.parser.FlatFileParser.Line;
    4755
     
    5260  Most functionality is provided by the {@link ReporterFlatFileImporter}.
    5361  This subclass adds support for decompressing a compressed BGX file
    54   and the second section with control reporters.
     62  and for parsing the second section with control probes. Currently,
     63  only the annotations for the [Probes] section are configurable, the
     64  regular expressions and column mappings for the [Controls] section
     65  is hardcoded into this class as follows:
     66 
     67  <pre class="code">
     68dataHeaderRegexp               --&gt; Probe_Id\tArray_Address_Id.*
     69minDataColumns                 --&gt; 3
     70reporterIdColumnMapping        --&gt; \Probe_Id\
     71nameColumnMapping              --&gt; \Probe_Id\
     72extendedColumnMapping.sequence --&gt; \Probe_Sequence\
     73</pre>
    5574*/
    5675public class BgxReporterImporter
     
    85104  private Section currentSection;
    86105 
     106  private ParameterValuesProxy confProxy;
     107 
    87108  public BgxReporterImporter()
    88109  {}
     
    97118    return about;
    98119  }
     120  @Override
     121  public void init(SessionControl sc, ParameterValues configuration, ParameterValues job)
     122  {
     123    this.confProxy = new ParameterValuesProxy(configuration);
     124    super.init(sc, this.confProxy, job);
     125  }
    99126  // -------------------------------------------
    100127
    101128  /*
    102     From the InteractivePlugin interface
     129    From the WrappedConfigureByExample interface
    103130    -------------------------------------------
    104131  */
     
    123150    return in;
    124151  }
    125  
     152  // -------------------------------------------
     153 
     154  /*
     155    From the AbstractFlatFileImporter class
     156    -------------------------------------------
     157  */
    126158  /**
    127159    Get the number of byte from the compressed file.
     
    147179  }
    148180
     181  /**
     182    When we get to the [Controls] section, we change configuration paramerers
     183    for the parser and column mappings.
     184    @see ParameterValuesProxy
     185  */
    149186  @Override
    150187  protected void handleSection(Line line)
     
    152189  {
    153190    currentSection = Section.getByName(line.name());
    154     //System.out.println("new section: " + currentSection.name() + " ("+ line.line() + ")");
    155191    if (currentSection == Section.CONTROLS)
    156192    {
    157193      // Change regular expressions to match the data in this section
    158194      ffp.setDataHeaderRegexp(Pattern.compile("Probe_Id\\tArray_Address_Id.*"));
    159       ffp.setMinDataColumns(4);
     195      ffp.setMinDataColumns(3);
    160196      ffp.setIgnoreNonExistingColumns(true);
    161     }
    162   }
    163 
    164   @Override
    165   protected void beginData()
    166   {
    167     super.beginData();
    168   }
    169  
    170   @Override
    171   protected void handleData(Data data)
    172     throws BaseException
    173   {
    174     //System.out.println("data: " + data.dataLineNo());
    175     super.handleData(data);
    176    
    177   }
    178 
     197      Map<String, Object> override = new HashMap<String, Object>();
     198      override.put("reporterIdColumnMapping", "\\Probe_Id\\");
     199      override.put("nameColumnMapping", "\\Probe_Id\\");
     200      override.put("extendedColumnMapping.sequence", "\\Probe_Sequence\\");
     201      confProxy.setOverride(override);
     202    }
     203    else
     204    {
     205      confProxy.setOverride(null);
     206    }
     207  }
    179208  // -------------------------------------------
    180209
    181  
     210  /**
     211    Keep track of which section we are currently parsing. We only need to
     212    know if we are in the [Probes] or [Controls] section.
     213  */
    182214  private enum Section
    183215  {
     
    208240      return s;
    209241    }
    210    
    211   }
     242  }
     243 
     244  /**
     245    Proxy for the plugin configuration parameters. Forwards
     246    everything to the original parameters, except when the
     247    'override' is set and then only for 'ColumnMapping' parameters.
     248    This is needed so we can use different column mappings when parsing
     249    the [Controls] section of the BGX file. The regular configuration
     250    parameters only apply to the [Probes] section.
     251  */
     252  private static class ParameterValuesProxy
     253    implements ParameterValues
     254  {
     255   
     256    private ParameterValues params;
     257    private Map<String, Object> override;
     258   
     259    private ParameterValuesProxy(ParameterValues params)
     260    {
     261      this.params = params;
     262    }
     263   
     264    /*
     265      From the ParameterValues interface
     266      -------------------------------------------
     267    */
     268    @Override
     269    public int getId()
     270    {
     271      return params.getId();
     272    }
     273    @Override
     274    public Job getJob()
     275    {
     276      return params.getJob();
     277    }
     278    @Override
     279    public PluginConfiguration getPluginConfiguration()
     280    {
     281      return params.getPluginConfiguration();
     282    }
     283    @Override
     284    public PluginDefinition getPluginDefinition()
     285    {
     286      return params.getPluginDefinition();
     287    }
     288    @Override
     289    public Object getValue(String name)
     290      throws PermissionDeniedException, BaseException
     291    {
     292      return override != null && name.contains("ColumnMapping") ?
     293        override.get(name) : params.getValue(name);
     294    }
     295    @Override
     296    public List<?> getValues(String name)
     297      throws PermissionDeniedException, BaseException
     298    {
     299      return params.getValues(name);
     300    }
     301    @Override
     302    public <T> void setValue(String name, ParameterType<T> type, T value)
     303      throws PermissionDeniedException, InvalidDataException, BaseException
     304    {
     305      params.setValue(name, type, value);
     306    }
     307    @Override
     308    public <T> void setValues(String name, ParameterType<T> type, List<T> values)
     309      throws PermissionDeniedException, InvalidDataException, BaseException
     310    {
     311      params.setValues(name, type, values);
     312    }
     313    // -------------------------------------------
     314   
     315    private void setOverride(Map<String, Object> override)
     316    {
     317      this.override = override;
     318    }
     319   
     320  }
     321 
     322 
     323 
    212324}
    213325
Note: See TracChangeset for help on using the changeset viewer.