// Need to filter VCF by FILTER = PASS; there are several reasons they don't accept calls that I trust // There's an attribute "ZYG" for zygosity, either "het" or "h**o" for heterozygous or homozygous public List <string> CallIndels(string spritzDirectory, int threads, string genomeFastaP, string bedPath, string bamPath, string outdir) { CheckInstallation(spritzDirectory); var vcftools = new VcfToolsWrapper(); IndelVcfPath = Path.Combine(outdir, "variants.indel.vcf"); //IndelVcf1IndexedPath = Path.Combine(outdir, "variants.indel.1index.vcf"); var commands = new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), "if [[ ! -f " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " || ! -s " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " ]]; then ", " scalpel-" + ScalpelVersion + "/scalpel-discovery --single " + "--bam " + WrapperUtility.ConvertWindowsPath(bamPath) + " --ref " + WrapperUtility.ConvertWindowsPath(genomeFastaP) + " --bed " + WrapperUtility.ConvertWindowsPath(bedPath) + " --numprocs " + threads.ToString() + " --dir " + WrapperUtility.ConvertWindowsPath(outdir), // scalpel uses 0-indexing, where SnpEff uses 1-indexing, so change this output to match snpeff //" awk 'BEGIN{OFS=\"\t\"}{ if (substr($0, 1, 1) != \"#\") $2=++$2; print $0 }' " + WrapperUtility.ConvertWindowsPath(IndelVcfPath) + " > " + WrapperUtility.ConvertWindowsPath(IndelVcf1IndexedPath), "fi", // vcf-concat doesn't keep all INFO header lines, so just dump the INFO from each variant vcftools.RemoveAllSnvs(spritzDirectory, IndelVcfPath, false, true) }; FilteredIndelVcfPath = vcftools.VcfWithoutSnvsPath; return(commands); }
/// <summary> /// HaplotypeCaller for calling variants on each RNA-Seq BAM file individually. /// </summary> /// <param name="spritzDirectory"></param> /// <param name="threads"></param> /// <param name="genomeFasta"></param> /// <param name="splitTrimBam"></param> /// <param name="dbsnpReferenceVcfPath"></param> /// <param name="newVcf"></param> public List <string> VariantCalling(string spritzDirectory, ExperimentType experimentType, int threads, string genomeFasta, string splitTrimBam, string dbsnpReferenceVcfPath) { HaplotypeCallerGvcfPath = Path.Combine(Path.GetDirectoryName(splitTrimBam), Path.GetFileNameWithoutExtension(splitTrimBam) + ".g.vcf.gz"); HaplotypeCallerVcfPath = Path.Combine(Path.GetDirectoryName(splitTrimBam), Path.GetFileNameWithoutExtension(splitTrimBam) + ".g.gt.vcf"); FilteredHaplotypeCallerVcfPath = Path.Combine(Path.GetDirectoryName(splitTrimBam), Path.GetFileNameWithoutExtension(splitTrimBam) + ".g.gt.NoIndels.vcf"); var vcftools = new VcfToolsWrapper(); List <string> commands = new List <string> { WrapperUtility.ChangeToToolsDirectoryCommand(spritzDirectory), SamtoolsWrapper.GenomeFastaIndexCommand(genomeFasta), GenomeDictionaryIndexCommand(genomeFasta), // check that reference VCF is indexed "if [ ! -f " + WrapperUtility.ConvertWindowsPath(dbsnpReferenceVcfPath) + ".idx ]; then " + Gatk(Workers) + " IndexFeatureFile -F " + WrapperUtility.ConvertWindowsPath(dbsnpReferenceVcfPath) + "; fi", // call variants "if [ ! -f " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerGvcfPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerGvcfPath) + " ]; then " + Gatk(Workers, 2) + " HaplotypeCaller" + " --native-pair-hmm-threads " + threads.ToString() + " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) + " -I " + WrapperUtility.ConvertWindowsPath(splitTrimBam) + " --min-base-quality-score 20" + (experimentType == ExperimentType.RNASequencing ? " --dont-use-soft-clipped-bases true" : "") + " --dbsnp " + WrapperUtility.ConvertWindowsPath(dbsnpReferenceVcfPath) + " -O " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerGvcfPath) + " -ERC GVCF" + // this prompts phasing! " --max-mnp-distance 3" + // note: this can't be used for joint genotyping here, but this setting is available in mutect2 for doing tumor vs normal calls "; fi", // index compressed gvcf file $"if [ ! -f {WrapperUtility.ConvertWindowsPath($"{HaplotypeCallerGvcfPath}.tbi")} ]; then {Gatk(Workers)} IndexFeatureFile -F {WrapperUtility.ConvertWindowsPath(HaplotypeCallerGvcfPath)}; fi", // genotype the gvcf file into a traditional vcf file "if [ ! -f " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerVcfPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerVcfPath) + " ]; then " + Gatk(Workers, 2) + " GenotypeGVCFs" + " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) + " -V " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerGvcfPath) + " -O " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerVcfPath) + "; fi", $"if [ ! -f {WrapperUtility.ConvertWindowsPath($"{HaplotypeCallerVcfPath}.idx")} ]; then {Gatk(Workers)} IndexFeatureFile -F {WrapperUtility.ConvertWindowsPath(HaplotypeCallerVcfPath)}; fi", // filter out indels "if [ ! -f " + WrapperUtility.ConvertWindowsPath(FilteredHaplotypeCallerVcfPath) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(FilteredHaplotypeCallerVcfPath) + " ]; then " + Gatk(Workers, 2) + " SelectVariants" + " --select-type-to-exclude INDEL" + " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) + " -V " + WrapperUtility.ConvertWindowsPath(HaplotypeCallerVcfPath) + " -O " + WrapperUtility.ConvertWindowsPath(FilteredHaplotypeCallerVcfPath) + "; fi", $"if [ ! -f {WrapperUtility.ConvertWindowsPath($"{FilteredHaplotypeCallerVcfPath}.idx")} ]; then {Gatk(Workers)} IndexFeatureFile -F {WrapperUtility.ConvertWindowsPath(FilteredHaplotypeCallerVcfPath)}; fi", // filter variants (RNA-Seq specific params... need to check out recommendations before using DNA-Seq) //"if [ ! -f " + WrapperUtility.ConvertWindowsPath(newVcf) + " ] || [ " + " ! -s " + WrapperUtility.ConvertWindowsPath(newVcf) + " ]; then " + // Gatk() + // " -T VariantFiltration" + // " -nct " + threads.ToString() + // " -R " + WrapperUtility.ConvertWindowsPath(genomeFasta) + // " -V " + WrapperUtility.ConvertWindowsPath(unfliteredVcf) + // " -window 35 -cluster 3" + // filter out clusters of 3 snps within 35 bases (https://software.broadinstitute.org/gatk/documentation/topic?name=methods) // " -filterName FS -filter \"FS > 30.0\"" + // " -filterName QD -filter \"QD < 2.0\"" + // " -o " + WrapperUtility.ConvertWindowsPath(newVcf) + // "; fi", }; return(commands); }