Changeset 5193


Ignore:
Timestamp:
Nov 27, 2009, 2:06:17 PM (14 years ago)
Author:
Nicklas Nordborg
Message:

References #1444: Implement generic BFS writers and readers/parsers

Implemented writers and parsers for metadata and annotation files. I am not 100% sure the event system for getting the data out is flexible enough.

Location:
trunk/src
Files:
9 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core/net/sf/basedb/util/encode/TabCrLfEncoderDecoder.java

    r5188 r5193  
    2626  tab and backslash with \n, \r, \t and \\. This encoder is suitable for use with
    2727  tab-separated text files.
     28 
    2829  @author Nicklas
    2930  @since 2.15
     
    3334{
    3435
    35   public TabCrLfEncoderDecoder()
    36   {}
     36  private final boolean nullIsEmptyString;
     37 
     38  /**
     39    Creates a new encoder/decoder.
     40    @param nullIsEmptyString TRUE to encode null as empty string,
     41      FALSE to not encode null. NOTE! If TRUE, this encoder is no
     42      longer symmetrical
     43  */
     44  public TabCrLfEncoderDecoder(boolean nullIsEmptyString)
     45  {
     46    this.nullIsEmptyString = nullIsEmptyString;
     47  }
    3748 
    3849  /*
     
    4354  public boolean isSymmetrical()
    4455  {
    45     return true;
     56    return !nullIsEmptyString;
    4657  }
    4758
     
    5061  {
    5162    if (s == null) return null;
     63    if (s.length() == 0) return nullIsEmptyString ? null : s;
    5264    boolean neededDecode = false;
    5365    StringBuilder sb = new StringBuilder(s.length());
     
    97109  public String encode(String s)
    98110  {
    99     if (s == null) return null;
     111    if (s == null) return nullIsEmptyString ? "" : null;
    100112    boolean neededEncode = false;
    101113    StringBuilder sb = new StringBuilder(s.length());
  • trunk/src/core/net/sf/basedb/util/export/TableWriter.java

    r5188 r5193  
    169169      first = false;
    170170    }
    171     println();
     171    print("\n");
    172172  }
    173173 
  • trunk/src/core/net/sf/basedb/util/parser/FlatFileParser.java

    r5014 r5193  
    181181 
    182182  /**
     183    The regular expression for matching the beginning-of-file marker
     184  */
     185  private Pattern bofMarker;
     186 
     187  /**
    183188    The regular expression for matching a header line.
    184189  */
     
    263268 
    264269  /**
     270    The value that was captured by the bofMarker pattern.
     271  */
     272  private String bofType;
     273 
     274  /**
    265275    List of lines parsed by the {@link #parseHeaders()} method.
    266276  */
     
    335345  }
    336346
     347  /**
     348    Set a regular expression that maches a beginning-of-file
     349    marker. This property should be set before starting to parse
     350    the file (otherwise it is ignored). The first method call that
     351    causes the parsing to be started will invoke {@link #parseToBof()}
     352    (can also be invoked manually).
     353    <p>
     354    The regular expression may contain a single capturing group. The
     355    matched value is returned by {@link #getBofType()}.
     356   
     357    @param regexp A regular expression
     358    @since 2.15
     359  */
     360  public void setBofMarkerRegexp(Pattern regexp)
     361  {
     362    this.bofMarker = regexp;
     363  }
     364 
    337365  /**
    338366    Set a regular expression that can be matched against a header.
     
    537565  }
    538566
     567  /**
     568    Parse the file until the beginning-of-file marker is found. If no
     569    regular expression has been set with {@link #setBofMarkerRegexp(Pattern)}
     570    or if the parsing of the file has already started, this method call is
     571    ignored.
     572   
     573    @return TRUE if this call resulted in parsing and the BOF marker was found,
     574      FALSE otherwise
     575    @since 2.15
     576  */
     577  public boolean parseToBof()
     578    throws IOException
     579  {
     580    if (bofMarker == null || parsedLines > 0) return false;
     581   
     582    boolean done = false;
     583    boolean matched = false;
     584    while (!done)
     585    {
     586      ThreadSignalHandler.checkInterrupted();
     587      String line = reader.readLine();
     588      parsedLines++;
     589      if (line == null)
     590      {
     591        done = true;
     592      }
     593      else
     594      {
     595        parsedCharacters += line.length();
     596        Matcher m = bofMarker.matcher(line);
     597        if (m.matches())
     598        {
     599          bofType = m.groupCount() > 0 ? m.group(1) : m.group();
     600          matched = true;
     601          done = true;
     602        }
     603      }
     604    }
     605    return matched;
     606  }
     607 
     608  /**
     609    Get the value captured by the BOF marker regular expression. If
     610    no capturing groups was specified in the pattern this value is the
     611    string that matched the entire pattern.
     612   
     613    @return The matched value, or null if BOF matching has not been done
     614    @since 2.15
     615  */
     616  public String getBofType()
     617  {
     618    return bofType;
     619  }
     620 
    539621  /**
    540622    Start parsing the input stream. The parser will read a single
     
    583665    throws IOException
    584666  {
     667    if (parsedLines == 0) parseToBof();
    585668    lines = new ArrayList<Line>(maxUnknownLines);
    586669    headers = new HashMap<String,String>();
    587     if (dataSplitter == null) return LineType.UNKNOWN;
    588670
    589671    nextData = null;
     
    10311113    if (nextData == null && nextSection == null)
    10321114    {
     1115      if (parsedLines == 0) parseToBof();
    10331116      ignoredLines = 0;
    10341117      unknownLines = 0;
     
    12671350    if (nextSection == null)
    12681351    {
     1352      if (parsedLines == 0) parseToBof();
    12691353      boolean done = false;
    12701354      while (!done)
     
    15611645        nullIsNull && "NULL".equalsIgnoreCase(value) ? null : value;
    15621646    }
     1647   
     1648    /**
     1649      The data line as an array of strings.
     1650      @since 2.15
     1651    */
     1652    public String[] data()
     1653    {
     1654      return result;
     1655    }
    15631656  }
    15641657 
  • trunk/src/test/TestAll.java

    r5188 r5193  
    132132    results.put("TestJep", TestJep.test_all());
    133133    results.put("TestEncoderDecoder", TestEncoderDecoder.test_all());
     134    results.put("TestBfs", TestBfs.test_all());
    134135    results.put("TestDbInfo", TestDbInfo.test_all());
    135136    results.put("TestPresets", TestPresets.test_all());
  • trunk/src/test/TestEncoderDecoder.java

    r5188 r5193  
    4040  {
    4141    write("++Testing string encode/decode");
    42     EncoderDecoder tabCrLf = new TabCrLfEncoderDecoder();
     42    EncoderDecoder tabCrLf = new TabCrLfEncoderDecoder(false);
    4343    test_code(tabCrLf, "nothingtocode", "nothingtocode");
    4444    test_code(tabCrLf, "a\tb\nc\rd\\e", "a\\tb\\nc\\rd\\\\e");
Note: See TracChangeset for help on using the changeset viewer.