Changeset 5729


Ignore:
Timestamp:
Nov 18, 2019, 1:43:09 PM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1199: Implement Variant calling pipeline

Added support for parsing the data from INFO column in VCF files. An InfoFactory implementation is needed that knows how to extract data into a more usable format. A simple implementation that extract everything into string key/value pairs have been provided.

Location:
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/vcf
Files:
3 added
2 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/vcf/SnpData.java

    r5320 r5729  
    2323  private String alt;
    2424 
    25 
     25  private InfoData info;
     26 
    2627  /**
    2728    Creates a new instance.
     
    107108  }
    108109 
     110  public void setInfo(InfoData info)
     111  {
     112    this.info = info;
     113  }
     114 
     115  public InfoData getInfo()
     116  {
     117    return info;
     118  }
     119
    109120  /**
    110121    Get all information as a JSON object. A new JSON object
     
    119130    json.put("ref", getRef());
    120131    json.put("alt", getAlt());
     132    if (info != null)
     133    {
     134      json.put("info", info.asJSONObject());
     135    }
    121136    return json;
    122137  }
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/vcf/VcfParser.java

    r5692 r5729  
    3030  private final Map<String, SnpData> snpDef;
    3131  private boolean useLineNoAsId;
     32  private InfoFactory infoFactory;
    3233 
    3334  /**
     
    4041 
    4142  /**
    42     Set this to TRUE to use line number as the id. Usefule if there
     43    Set this to TRUE to use line number as the id. Useful if there
    4344    are no ID values in the file, but note that this means that
    4445    different VCF files can't be compared.
     46    @since 4.24
    4547  */
    4648  public void setUseLineNoAsId(boolean useLineNoAsId)
    4749  {
    4850    this.useLineNoAsId = useLineNoAsId;
     51  }
     52 
     53  /**
     54    Set the factory to use for extracting information from
     55    the INFO column. If not set, the INFO column is not parsed.
     56    @since 4.24
     57  */
     58  public void setInfoFactory(InfoFactory infoFactory)
     59  {
     60    this.infoFactory = infoFactory;
    4961  }
    5062 
     
    7688      throw new IOException("File '" + fileName + "' line " + lineNo + ": Could not find header line starting with '#CHROM{tab}POS...'");
    7789    }
     90   
     91    if (infoFactory != null)
     92    {
     93      for (String name : ffp.getHeaderNames())
     94      {
     95        infoFactory.addInfoHeader(name, ffp.getHeader(name));
     96      }
     97    }
     98   
    7899    List<String> headers = ffp.getColumnHeaders();
    79100   
     
    110131        snp.setAlt(altMapper.getString(line));
    111132        snpDef.put(id, snp);
     133        if (infoFactory != null)
     134        {
     135          snp.setInfo(infoFactory.getInfo(infoMapper.getString(line), snp));
     136        }
    112137      }
    113138     
     
    146171    ffp.setDataHeaderRegexp(Pattern.compile("#CHROM\\tPOS.*"));
    147172    ffp.setDataSplitterRegexp(Pattern.compile("\\t"));
     173    ffp.setHeaderRegexp(Pattern.compile("##INFO\\=\\<ID\\=(\\w+),(.*)\\>"));
    148174    ffp.setIgnoreRegexp(Pattern.compile("##.*"));
    149175    return ffp;
Note: See TracChangeset for help on using the changeset viewer.