Changeset 5827


Ignore:
Timestamp:
Feb 19, 2020, 9:25:09 AM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #1218: Implement MIPs alignment

Added Trimmomatic steps to the script. Logging and error handling is not fully implemented yet.

Location:
extensions/net.sf.basedb.reggie/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml

    r5826 r5827  
    325325        <parallel-environment>smp 8-16</parallel-environment>
    326326       
     327        <!-- Options for Trimmomatic -->
     328        <trimmomatic>
     329          <!-- Optional path to Trimmomatic, if not specified the default in the 'programs' section is used -->
     330          <path>/usr/local/packages/trimmomatic/0.39/trimmomatic.jar</path>
     331          <!-- The first step should filter Illumina adapters-->
     332          <step-1>ILLUMINACLIP:adapter.fa:3:12:7:1:true MINLEN:30</step-1>
     333          <!-- The second step is for all other filters -->
     334          <step-2>MAXINFO:30:0.25 MINLEN:30</step-2>
     335        </trimmomatic>
     336       
    327337      </align-mips>
    328338     
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/dao/Datafiletype.java

    r5706 r5827  
    22
    33import java.lang.reflect.Field;
     4import java.util.ArrayList;
     5import java.util.Collections;
    46import java.util.List;
    57
     
    266268    return member == null ? null : member.getFile();
    267269  }
     270 
     271  /**
     272    Get all files of this type that are attached to the given
     273    item.
     274    @since 4.26
     275  */
     276  public List<File> getAllFiles(DbControl dc, FileStoreEnabled item)
     277  {
     278    if (!item.hasFileSet()) return Collections.emptyList();
     279    ItemQuery<FileSetMember> query = item.getFileSet().getMembers(get(dc));
     280    List<File> files = new ArrayList<>();
     281    for (FileSetMember member : query.list(dc))
     282    {
     283      files.add(member.getFile());
     284    }
     285    return files;
     286  }
    268287
    269288  /**
  • extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java

    r5826 r5827  
    33import java.util.ArrayList;
    44import java.util.List;
     5import java.util.Set;
     6import java.util.TreeSet;
    57
    68import org.slf4j.Logger;
     
    911import net.sf.basedb.core.DbControl;
    1012import net.sf.basedb.core.DerivedBioAssay;
     13import net.sf.basedb.core.File;
    1114import net.sf.basedb.core.ItemList;
    1215import net.sf.basedb.core.ItemNotFoundException;
     
    3033import net.sf.basedb.reggie.dao.Annotationtype;
    3134import net.sf.basedb.reggie.dao.BiomaterialList;
     35import net.sf.basedb.reggie.dao.Datafiletype;
    3236import net.sf.basedb.reggie.dao.DoNotUse;
    3337import net.sf.basedb.reggie.dao.Library;
     
    132136    // Paths to programs used (bowtie , hisat, picards, and more...)
    133137    String pipeline_scripts_path = cfg.getRequiredConfig("programs/pipeline-scripts/path", null);
     138    String java_path = cfg.getRequiredConfig("programs/java/path", alignParameterSet);
     139    String trimmomatic_path = cfg.getConfig("align-mips/trimmomatic/path", alignParameterSet, null);
     140    if (trimmomatic_path == null)
     141    {
     142      trimmomatic_path = cfg.getRequiredConfig("programs/trimmomatic/path", alignParameterSet);
     143    }
    134144
    135145    // Options for the programs
    136146    String align_parallelEnvironment = cfg.getConfig("align-mips/parallel-environment", alignParameterSet, "smp 8-");
     147    String align_trimmomaticOptions1 = cfg.getRequiredConfig("align-mips/trimmomatic/step-1", alignParameterSet);
     148    String align_trimmomaticOptions2 = cfg.getRequiredConfig("align-mips/trimmomatic/step-2", alignParameterSet);
    137149
    138150    // Load common items
     
    166178      Sample specimen =  (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN);
    167179      String fastQFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged);
     180     
     181      // Get all FASTQ files
     182      List<File> fastqFiles = Datafiletype.FASTQ.getAllFiles(dc, merged);
     183      Set<String> fastqPrefix = new TreeSet<>();
     184      String R1_suffix = "_R1.fastq.gz";
     185      for (File f : fastqFiles)
     186      {
     187        String name = ScriptUtil.checkValidFilename(f.getName());
     188        if (name.endsWith(R1_suffix))
     189        {
     190          String prefix = name.substring(0, name.length()-R1_suffix.length());
     191          fastqPrefix.add(prefix);
     192        }
     193      }
    168194     
    169195      /*
     
    228254      script.comment("Setting up scripting environment and copying script to tmp folder");
    229255      script.cmd("ScriptDir=" + pipeline_scripts_path);
     256      script.cmd("JAVA="+java_path);
     257      script.cmd("TrimmomaticJAR="+trimmomatic_path);
    230258      script.cmd("ArchiveRoot="+archiveRoot);
    231259      script.cmd("FastqFolder=${ArchiveRoot}"+fastQFolder);
     260      script.cmd("FastqPrefix=( \\");
     261      script.cmd("  " + Values.getString(fastqPrefix, " \\\n  ", true) + " \\");
     262      script.cmd("  )");
     263     
    232264      script.newLine();
    233265 
     
    243275      script.comment("Move to the temporary working directory and copy the pipeline scripts");
    244276      script.cmd("cd ${TMPDIR}");
     277      script.cmd("mkdir fastq");
     278      script.cmd("mkdir trimmomatic.1");
     279      script.cmd("mkdir trimmomatic.2");
    245280      script.cmd("cp ${ScriptDir}/stdwrap.sh .");
    246       script.newLine();
    247 
     281      script.cmd("cp ${ScriptDir}/mips_adapters.sh .");
     282      script.newLine();
     283
     284      script.comment("Setup adapters in a separate script");
     285      script.cmd(". ./mips_adapters.sh");
     286      script.newLine();
     287     
    248288      script.comment("Copy FASTQ files to tmp folder");
    249289      script.progress(10, "Copying FASTQ files");
     
    252292      script.cmd("exit 1");
    253293      script.cmd("fi");
    254       script.cmd("mkdir fastq");
    255294      script.cmd("cp ${FastqFolder}/*.fastq.gz fastq");
    256295      script.newLine();
    257296
    258       /*
    259       script.comment("Find FASTQ files");
    260       script.cmd("FASTQ1=`find fastq -name \"*_R1.fastq.gz\" -print -quit 2> /dev/null`");
    261       script.cmd("FASTQ2=`find fastq -name \"*_R2.fastq.gz\" -print -quit 2> /dev/null`");
    262       script.newLine();
    263       */
    264 
    265 
     297      script.comment("Run Trimmomatic");
     298      script.cmd("for prefix in ${FastqPrefix[@]} ; do");
     299     
     300      String trimCmd1 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
     301      trimCmd1 += " -threads ${NumThreads}";
     302      trimCmd1 += " -phred33 -trimlog trimmomatic.1/${prefix}.log";
     303      trimCmd1 += " fastq/${prefix}_R1.fastq.gz";
     304      trimCmd1 += " fastq/${prefix}_R2.fastq.gz";
     305      trimCmd1 += " trimmomatic.1/${prefix}_R1.fastq";
     306      trimCmd1 += " trimmomatic.1/un_${prefix}_R1.fastq";
     307      trimCmd1 += " trimmomatic.1/${prefix}_R2.fastq";
     308      trimCmd1 += " trimmomatic.1/un_${prefix}_R2.fastq";
     309      if (align_trimmomaticOptions1 != null)
     310      {
     311        trimCmd1 += " " + align_trimmomaticOptions1;
     312      }
     313      trimCmd1 += " >> trimmomatic.1/trimmomatic.out";
     314     
     315      String trimCmd2 = "./stdwrap.sh ${JAVA} -jar ${TrimmomaticJAR} PE";
     316      trimCmd2 += " -threads ${NumThreads}";
     317      trimCmd2 += " -phred33 -trimlog trimmomatic.2/${prefix}.log";
     318      trimCmd2 += " trimmomatic.1/${prefix}_R1.fastq";
     319      trimCmd2 += " trimmomatic.1/${prefix}_R2.fastq";
     320      trimCmd2 += " trimmomatic.2/${prefix}_R1.fastq";
     321      trimCmd2 += " trimmomatic.2/un_${prefix}_R1.fastq";
     322      trimCmd2 += " trimmomatic.2/${prefix}_R2.fastq";
     323      trimCmd2 += " trimmomatic.2/un_${prefix}_R2.fastq";
     324      if (align_trimmomaticOptions2 != null)
     325      {
     326        trimCmd2 += " " + align_trimmomaticOptions2;
     327      }
     328      trimCmd2 += " >> trimmomatic.2/trimmomatic.out";
     329      script.cmd("   " + trimCmd1);
     330      script.cmd("   " + trimCmd2);   
     331      script.cmd("done");
     332     
    266333      /*
    267334      script.progress(95, "Copying result files to project archive");
Note: See TracChangeset for help on using the changeset viewer.