Changeset 5841
- Timestamp:
- Feb 25, 2020, 11:47:30 AM (3 years ago)
- Location:
- extensions/net.sf.basedb.reggie/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
extensions/net.sf.basedb.reggie/trunk/config/reggie-config.xml
r5831 r5841 364 364 <genome-fasta>${ReferenceDir}/gatk_bundle/2.8/b37/human_g1k_v37_decoy.fasta</genome-fasta> 365 365 366 <mark-duplicate -options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicate-options>367 366 <mark-duplicates-options>-CREATE_INDEX true -CREATE_MD5_FILE true -MAX_FILE_HANDLES 20000 -ALLOW_MISSING_UMIS false -DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES -MAX_EDIT_DISTANCE_TO_JOIN 1 -REMOVE_DUPLICATES true</mark-duplicates-options> 367 <pcr-metrics-options>-METRIC_ACCUMULATION_LEVEL null -METRIC_ACCUMULATION_LEVEL ALL_READS -MINIMUM_MAPPING_QUALITY 1 -MINIMUM_BASE_QUALITY 20 -CLIP_OVERLAPPING_READS true -COVERAGE_CAP 500 -NEAR_DISTANCE 5</pcr-metrics-options> 368 368 </align-mips> 369 369 -
extensions/net.sf.basedb.reggie/trunk/src/net/sf/basedb/reggie/grid/MipsAlignJobCreator.java
r5839 r5841 9 9 import org.slf4j.LoggerFactory; 10 10 11 import net.sf.basedb.core.AnyToAny; 12 import net.sf.basedb.core.DataFileType; 11 13 import net.sf.basedb.core.DbControl; 12 14 import net.sf.basedb.core.DerivedBioAssay; 15 import net.sf.basedb.core.Directory; 13 16 import net.sf.basedb.core.File; 17 import net.sf.basedb.core.FileServer; 18 import net.sf.basedb.core.FileSetMember; 14 19 import net.sf.basedb.core.InvalidDataException; 15 20 import net.sf.basedb.core.ItemList; … … 17 22 import net.sf.basedb.core.ItemSubtype; 18 23 import net.sf.basedb.core.Job; 24 import net.sf.basedb.core.Path; 19 25 import net.sf.basedb.core.Protocol; 20 26 import net.sf.basedb.core.Sample; … … 32 38 import net.sf.basedb.reggie.Reggie; 33 39 import net.sf.basedb.reggie.XmlConfig; 40 import net.sf.basedb.reggie.dao.AlignedSequences; 34 41 import net.sf.basedb.reggie.dao.Annotationtype; 35 42 import net.sf.basedb.reggie.dao.BiomaterialList; … … 37 44 import net.sf.basedb.reggie.dao.Dna; 38 45 import net.sf.basedb.reggie.dao.DoNotUse; 46 import net.sf.basedb.reggie.dao.Fileserver; 39 47 import net.sf.basedb.reggie.dao.Library; 40 48 import net.sf.basedb.reggie.dao.MergedSequences; … … 159 167 String genome_dict = cfg.getRequiredConfig("align-mips/genome-dict", alignParameterSet); 160 168 String genome_fasta = cfg.getRequiredConfig("align-mips/genome-fasta", alignParameterSet); 161 String mark_duplicate_options = cfg.getRequiredConfig("align-mips/mark-duplicate-options", alignParameterSet); 169 String mark_duplicates_options = cfg.getRequiredConfig("align-mips/mark-duplicates-options", alignParameterSet); 170 String pcr_metrics_options = cfg.getRequiredConfig("align-mips/pcr-metrics-options", alignParameterSet); 162 171 163 172 // Load common items … … 294 303 script.cmd(" " + Values.getString(fastqPrefix, "\n ", true)); 295 304 script.cmd(")"); 305 script.cmd("AlignFolder=${ArchiveRoot}"+alignFolder); 296 306 script.cmd("TrimOptionsAdapter=\""+align_trimmomaticOptions1+"\""); 297 307 script.cmd("TrimOptionsQual=\""+align_trimmomaticOptions2+"\""); 298 308 script.cmd("NovoAlignOptions=\""+novoalign_options+"\""); 299 script.cmd("MarkDuplicatesOptions=\""+mark_duplicate_options+"\""); 309 script.cmd("MarkDuplicatesOptions=\""+mark_duplicates_options+"\""); 310 script.cmd("PcrMetricsOptions=\""+pcr_metrics_options+"\""); 300 311 301 312 script.newLine(); … … 364 375 script.comment("Run novoalign"); 365 376 script.progress(40, "Aligning with novoalign"); 366 script.cmd("min_insert=$(awk 'NR == 1 || $3 - $2 < min {min = $3 - $2}END{print min - 1}' \"${AmpliconsBed}\")"); 367 script.cmd("max_insert=$(awk 'NR == 1 || $3 - $2 > max {max = $3 - $2}END{print max + 1}' \"${AmpliconsBed}\")"); 377 script.cmd("get_min_max_insert"); 368 378 script.cmd("n=0"); 369 379 script.cmd("AlignedBams=''"); … … 377 387 script.newLine(); 378 388 379 script.comment("Re-header BAM file");389 script.comment("Re-header and split BAM file"); 380 390 script.progress(70, "Post-processing aligned BAM file"); 381 391 script.cmd("reheader_bam"); 382 script.newLine();383 384 script.comment("Split BAM file into concordant, discordant and unmapped");385 392 script.cmd("split_bam"); 386 393 script.newLine(); … … 392 399 script.newLine(); 393 400 394 /* 401 script.comment("Collect metrics"); 402 script.progress(90, "Collecting PCR and alignment metrics"); 403 script.cmd("collect_pcr_metrics"); 404 script.cmd("collect_alignment_metrics"); 405 script.newLine(); 406 395 407 script.progress(95, "Copying result files to project archive"); 396 script.cmd("mkdir -p ${HisatFolder}"); 397 script.cmd("rm -rf ${HisatFolder}/*"); 398 script.cmd("cp masked/*.out ${WD}"); 399 script.cmd("cp aligned/*.out ${WD}"); 400 script.cmd("cp aligned/alignment_picardmetrics.csv ${WD}"); 401 script.cmd("cp aligned/* ${HisatFolder}"); 408 script.cmd("mkdir -p ${AlignFolder}"); 409 script.cmd("rm -rf ${AlignFolder}/*"); 410 script.cmd("cp out/* ${AlignFolder}"); 411 // TODO - copy some files to $WD so that we can update some annotations like ALIGNED_PAIRS, etc. 402 412 if (externalGroup != null) 403 413 { 404 script.cmd("chgrp -R " + externalGroup + " ${HisatFolder} 2>> ${WD}/chgrp.out || echo [" + alignedName +"] >> ${WD}/chgrp.out"); 405 } 406 script.cmd("ls -1 ${HisatFolder}/* >> ${WD}/files.out"); 407 script.newLine(); 408 */ 414 script.cmd("chgrp -R " + externalGroup + " ${AlignFolder} 2>> ${WD}/chgrp.out || echo [" + alignedName +"] >> ${WD}/chgrp.out"); 415 } 416 script.cmd("ls -1 ${AlignFolder}/* >> ${WD}/files.out"); 417 script.newLine(); 409 418 410 419 script.progress(99, "Cleaning up temporary folders"); … … 435 444 public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) 436 445 { 446 String jobName = status.getName(); 447 String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); 448 449 Metrics metrics = parseAlignedOut(sc, job, files); 437 450 return null; 438 451 } 439 452 453 private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut) 454 { 455 Metrics metrics = new Metrics(); 456 457 DbControl dc = null; 458 try 459 { 460 dc = sc.newDbControl(); 461 462 AlignedSequences alignedSequences = AlignedSequences.getByJob(dc, job); 463 DerivedBioAssay aligned = alignedSequences.getItem(); 464 465 // Create file links 466 boolean useExternalProjectArchive = Reggie.isExternalItem(aligned.getName()); 467 FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); 468 String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; 469 470 String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); 471 String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); 472 Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); 473 DataFileType bamData = Datafiletype.BAM.load(dc); 474 ItemSubtype bamType = bamData.getGenericType(); 475 476 int lineNo = 0; 477 for (String line : filesOut.split("\n")) 478 { 479 lineNo++; 480 481 File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); 482 f.setFileServer(fileArchive); 483 String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); 484 try 485 { 486 f.setUrl(fileUrl, true); 487 } 488 catch (RuntimeException ex) 489 { 490 f.setUrl(fileUrl, false); 491 } 492 493 if (!f.isInDatabase()) 494 { 495 dc.saveItem(f); 496 } 497 if (f.getName().equals("concordant.bam")) 498 { 499 //f.setDescription(metrics.numReadsAfterAlign + " ALIGNED PAIRS"); 500 f.setItemSubtype(bamType); 501 FileSetMember member = aligned.getFileSet().addMember(f, bamData); 502 } 503 else 504 { 505 AnyToAny link = AnyToAny.getNewOrExisting(dc, aligned, f.getName(), f, true); 506 if (!link.isInDatabase()) dc.saveItem(link); 507 } 508 } 509 510 dc.commit(); 511 } 512 finally 513 { 514 if (dc != null) dc.close(); 515 } 516 517 return metrics; 518 } 519 520 } 521 522 static class Metrics 523 { 440 524 } 441 525
Note: See TracChangeset
for help on using the changeset viewer.