From c96feefca6932ea266166c477fa8f9160281e884 Mon Sep 17 00:00:00 2001 From: Peter Ebert Date: Fri, 15 Sep 2017 19:26:16 +0200 Subject: [PATCH] ENH: final version of CHPv5, validates against XSD and together with AMD of test sample --- docs/quantification/chip-seq/CHPv5.xml | 117 +++++++++++++------------ 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/docs/quantification/chip-seq/CHPv5.xml b/docs/quantification/chip-seq/CHPv5.xml index 8f92919..942f1a2 100644 --- a/docs/quantification/chip-seq/CHPv5.xml +++ b/docs/quantification/chip-seq/CHPv5.xml @@ -79,27 +79,29 @@ - QcSummary - JSON + GALvX_QCSummary + JSON / txt collection The median insert size (field: insertSizeMedian) is extracted from the QC summary file. + Note that for compatibility with previous alignment processes, the QC summary files + may also have the old tabular / text-based format (field: PE_insertsize (mapq>0)) - - blacklist_regions - BED - single - Blacklist region - reference_genome 2bit single The reference genome file; see DCC/download/results/references/genomes + + blacklist_regions + BED + single + Blacklist region + chromosome_sizes TSV @@ -107,7 +109,7 @@ 2-column, tab-separated table of chromosome sizes for reference genome - autosomal_regions + autosome_regions BED single A file listing all autosomes as BED regions for filtering @@ -115,13 +117,13 @@ - DEEPID.PROC.DATE.raw.bamcov + DEEPID.PROC.DATE.ASSM.raw.bamcov bigwig collection Signal coverage track generated from raw BAM files - DEEPID.PROC.DATE.filt.bamcov + DEEPID.PROC.DATE.ASSM.filt.bamcov bigwig collection @@ -129,92 +131,91 @@ - DEEPID.PROC.DATE.ses-fc + DEEPID.PROC.DATE.ASSM.ses.log2-Input bigwig collection - SES normalized fold-change signal + SES normalized signal-over-Input track - DEEPID.PROC.DATE.cnt-fc + DEEPID.PROC.DATE.ASSM.cnt.log2-Input bigwig collection - Read-count normalized fold-change signal + Read-count normalized signal-over-Input track - DEEPID.PROC.DATE.gcfreq + DEEPID.PROC.DATE.ASSM.gcbias svg collection GC bias plot based on raw BAM files - DEEPID.PROC.DATE.gcfreq + DEEPID.PROC.DATE.ASSM.gcfreq txt collection Obs./exp. GC read frequencies based on raw BAM files - DEEPID.PROC.DATE.hhmm.emfit + DEEPID.PROC.DATE.ASSM.hhmm.emfit PDF collection histoneHMM output visualizing the EM fit. Check this before using the histoneHMM output - DEEPID.PROC.DATE.hhmm.out + DEEPID.PROC.DATE.ASSM.hhmm.out zip collection Zip archive containing other histoneHMM output files (raw data files not needed by most users) - DEEPID.PROC.DATE.macs.out + DEEPID.PROC.DATE.ASSM.macs.out zip collection Zip archive containing other MACS output files (raw data files not needed by most users) - DEEPID.PROC.DATE.hhmm.broad + DEEPID.PROC.DATE.ASSM.hhmm.broad BED / broadPeak collection Histone and Input enriched regions called by histoneHMM - DEEPID.PROC.DATE.macs.broad + DEEPID.PROC.DATE.ASSM.macs.broad BED / broadPeak collection Histone enriched regions called by MACS - DEEPID.PROC.DATE.macs.narrow + DEEPID.PROC.DATE.ASSM.macs.narrow BED / narrowPeak collection Histone enriched regions called by MACS - DEEPID.PROC.DATE.fgpr + DEEPID.PROC.DATE.ASSM.fgpr SVG single Fingerprint plots based on raw BAM files - DEEPID.PROC.DATE.qm-fgpr + DEEPID.PROC.DATE.ASSM.qm-fgpr txt single Fingerprint quality metrics based on raw BAM files - DEEPID.PROC.DATE.counts-fgpr + DEEPID.PROC.DATE.ASSM.counts-fgpr tsv single Fingerprint raw counts based on raw BAM files - - DEEPID.PROC.DATE.auto.counts-summ + DEEPID.PROC.DATE.ASSM.auto.counts-summ tsv single multiBamSummary raw counts based on filtered and autosome-restricted BAM files - DEEPID.PROC.DATE.auto.summ + DEEPID.PROC.DATE.ASSM.auto.summ npz single @@ -222,16 +223,26 @@ The format is a numpy compatible binary file. - + + DEEPID.PROC.DATE.ASSM.bamcorr + SVG + collection + Correlation heatmaps using Pearson and Spearman correlation measure + + + DEEPID.PROC.DATE.ASSM.corrmat + tsv + collection + Raw correlation matrices + - bamCoverage 2.5.3 @@ -245,7 +256,7 @@ GALvX_Histone, GALvX_Input @@ -257,7 +268,7 @@ @@ -272,10 +283,10 @@ 2.5.3 no looping @@ -321,7 +332,7 @@ 2.5.3 @@ -338,10 +349,10 @@ 2.5.3 no looping @@ -370,7 +381,7 @@ 3.0 GALvX_Histone @@ -398,7 +409,7 @@ DEEPID.hmm.bed && - mv DEEPID-zinba-emfit.pdf {DEEPID.PROC.DATE.hhmm.emfit} + mv DEEPID-zinba-emfit.pdf {DEEPID.PROC.DATE.ASSM.hhmm.emfit} ]]> DEEPID-regions.gff @@ -409,7 +420,7 @@ 3.0 GALvX_Histone @@ -418,8 +429,6 @@ -zinba-params-em.txt, .txt - - sambamba 0.6.6 @@ -432,7 +441,6 @@ DEEPID.tmp.filt.bam Count number of reads overlapping peak regions, later used for FRiP score - bedtools 2.26.0 @@ -444,7 +452,6 @@ peak_file Intersect all peak files with blacklist regions for flagging - bedtools 2.26.0 @@ -456,14 +463,13 @@ histoneHMM_peak_file Intersect all histoneHMM peak files with Input peaks for flagging - Python 2.7.13 {DEEPID.PROC.DATE.macs.narrow} {DEEPID.PROC.DATE.macs.broad} {DEEPID.PROC.DATE.hhmm.broad} + > {DEEPID.PROC.DATE.ASSM.macs.narrow} {DEEPID.PROC.DATE.ASSM.macs.broad} {DEEPID.PROC.DATE.ASSM.hhmm.broad} ]]> peak-file @@ -473,7 +479,6 @@ standard broadPeak/narrowPeak format specifications. - sambamba 0.6.6 @@ -493,9 +498,9 @@ 2.5.3 no looping @@ -506,10 +511,10 @@ 2.5.3 no looping