private static int BinOneGenomicInterval(CanvasBinParameters parameters, Dictionary <string, BitArray> possibleAlignments, Dictionary <string, HitArray> observedAlignments, Dictionary <string, Int16[]> fragmentLengths) { InitializeAlignmentArrays(parameters.referenceFile, parameters.chromosome, parameters.coverageMode, possibleAlignments, observedAlignments, fragmentLengths); Console.WriteLine("{0} Initialized alignment arrays", DateTime.Now); LoadObservedAlignmentsBAM(parameters.bamFile, parameters.isPairedEnd, parameters.chromosome, parameters.coverageMode, observedAlignments[parameters.chromosome], fragmentLengths[parameters.chromosome]); Console.WriteLine("{0} Loaded observed alignments", DateTime.Now); // Filter on BED file. if (parameters.filterFile != null) { ExcludeTagsOverlappingFilterFile(parameters.filterFile, possibleAlignments); } // Make sure we don't have an 'impossible' observed alignment. ScreenObservedTags(observedAlignments, possibleAlignments); Console.WriteLine("{0} Serialize intermediate data", DateTime.Now); //output binary intermediate file IntermediateData data = new IntermediateData(possibleAlignments, observedAlignments, fragmentLengths, parameters.coverageMode); Directory.CreateDirectory(Path.GetDirectoryName(parameters.outFile)); using (FileStream stream = new FileStream(parameters.outFile, FileMode.Create, FileAccess.Write)) { ProtoBuf.Serializer.Serialize <IntermediateData>(stream, data); } Console.WriteLine("{0} Intermediate data serialized", DateTime.Now); return(0); }
public static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); CanvasBinParameters parameters = CanvasBinParameters.ParseCommandLine(args); if (parameters == null) { return(1); } Console.WriteLine("{0} Parsed command-line", DateTime.Now); if (parameters.coverageMode == CanvasCommon.CanvasCoverageMode.Fragment) { return((new FragmentBinner(parameters)).Bin()); } else { return(CanvasBin.Run(parameters)); } }
public void TestAllChromsInBedAreInBam() { CanvasBinParameters parameters = new CanvasBinParameters(); string assemblyFolder = Isas.Shared.Utilities.GetAssemblyFolder(typeof(TestCanvasBin)); string dataFolder = Path.Combine(assemblyFolder, "Data"); parameters.predefinedBinsFile = Path.Combine(dataFolder, "bins_chrU.bed"); parameters.bamFile = Path.Combine(dataFolder, "single-end.bam"); parameters.isPairedEnd = true; FragmentBinner fragmentBinner = new FragmentBinner(parameters); bool exceptionCaught = false; try { fragmentBinner.Bin(); } catch (ApplicationException e) { if (e.Message.Contains(String.Format("Not all chromosomes in {0} are found in {1}.", parameters.predefinedBinsFile, parameters.bamFile))) exceptionCaught = true; } Assert.IsTrue(exceptionCaught); }
public static CanvasBinParameters ParseCommandLine(string[] args) { CanvasBinParameters parameters = new CanvasBinParameters(); // Should I display a help message? bool needHelp = false; OptionSet p = new OptionSet() { { "b|bam=", "bam file containing unique alignments", v => parameters.bamFile = v }, { "r|reference=", "Canvas-ready reference fasta file", v => parameters.referenceFile = v }, { "c|chr=", "for bam input, only work on this chromosome. Output intermediate binary data. Must follow-up with a single CanvasBin call passing all the intermediate binary data files (see -i option)", v => parameters.chromosome = v }, { "i|infile=", "intermediate binary data file from individual chromosome. Pass this option multiple times, once for each chromosome", v => parameters.intermediatePaths.Add(v) }, { "f|filter=", "bed file containing regions to ignore", v => parameters.filterFile = v }, { "d|bindepth=", "median counts desired in each bin", v => parameters.countsPerBin = Convert.ToInt32(v) }, { "z|binsize=", "bin size; optional", v => parameters.binSize = Convert.ToInt32(v) }, { "o|outfile=", "text file to output containing computed bins, or if -c option was specified the intermediate binary data file to output", v => parameters.outFile = v }, { "y|binsizeonly", "calcualte bin size and exit", v => parameters.binSizeOnly = v != null }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "p|paired-end", "input .bam is a paired-end alignment (e.g. from Isaac)", v => parameters.isPairedEnd = v != null }, { "m|mode=", "coverage measurement mode", v => parameters.coverageMode = CanvasCommon.Utilities.ParseCanvasCoverageMode(v) }, { "t|manifest=", "Nextera manifest file", v => parameters.manifestFile = v }, { "n|bins=", "bed file containing predefined bins", v => parameters.predefinedBinsFile = v }, }; Console.WriteLine("CanvasBin {0}", System.Reflection.Assembly.GetEntryAssembly().GetName().Version.ToString()); List <string> extraArgs = p.Parse(args); // Check for required arguments. Display the help message if any of them are missing. if (string.IsNullOrEmpty(parameters.referenceFile)) { Console.Error.WriteLine("Please specify the Canvas k-uniqueness reference file."); needHelp = true; } else if (string.IsNullOrEmpty(parameters.outFile)) { Console.Error.WriteLine("Please specify an output file name."); needHelp = true; } else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin == -1) { Console.Error.WriteLine("Please specify counts per bin."); needHelp = true; } else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && string.IsNullOrEmpty(parameters.chromosome) && parameters.intermediatePaths.Count == 0) { Console.Error.WriteLine("Please specify chromsome to measure coverage for."); needHelp = true; } if (needHelp) { ShowHelp(p); return(null); } // Does the reference file exist? if (!File.Exists(parameters.referenceFile)) { Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.referenceFile); return(null); } // Does the bam file exist? if (parameters.bamFile == null || !File.Exists(parameters.bamFile)) { Console.WriteLine("CanvasBin.exe: Alignment input does not exist! Exiting."); return(null); } // Does the BED file exist? if ((parameters.filterFile != null) && (!File.Exists(parameters.filterFile))) { Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.filterFile); return(null); } // Did the user supply a non-negative number? if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin < 1) { Console.WriteLine("CanvasBin.exe: Median counts must be strictly positive. Exiting."); return(null); } return(parameters); }
public static CanvasBinParameters ParseCommandLine(string[] args) { CanvasBinParameters parameters = new CanvasBinParameters(); // Should I display a help message? bool needHelp = false; OptionSet p = new OptionSet() { { "b|bam=", "bam file containing unique alignments", v => parameters.bamFile = v }, { "r|reference=", "Canvas-ready reference fasta file", v => parameters.referenceFile = v }, { "c|chr=", "for bam input, only work on this chromosome. Output intermediate binary data. Must follow-up with a single CanvasBin call passing all the intermediate binary data files (see -i option)", v => parameters.chromosome = v}, { "i|infile=", "intermediate binary data file from individual chromosome. Pass this option multiple times, once for each chromosome", v => parameters.intermediatePaths.Add(v)}, { "f|filter=", "bed file containing regions to ignore", v => parameters.filterFile = v }, { "d|bindepth=", "median counts desired in each bin", v => parameters.countsPerBin = Convert.ToInt32(v) }, { "z|binsize=", "bin size; optional", v => parameters.binSize = Convert.ToInt32(v) }, { "o|outfile=", "text file to output containing computed bins, or if -c option was specified the intermediate binary data file to output", v => parameters.outFile = v }, { "y|binsizeonly", "calcualte bin size and exit", v => parameters.binSizeOnly = v != null }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "p|paired-end", "input .bam is a paired-end alignment (e.g. from Isaac)", v => parameters.isPairedEnd = v != null}, { "m|mode=", "coverage measurement mode", v => parameters.coverageMode = CanvasCommon.Utilities.ParseCanvasCoverageMode(v) }, { "t|manifest=", "Nextera manifest file", v => parameters.manifestFile = v }, { "n|bins=", "bed file containing predefined bins", v => parameters.predefinedBinsFile = v }, }; Console.WriteLine("CanvasBin {0}", System.Reflection.Assembly.GetEntryAssembly().GetName().Version.ToString()); List<string> extraArgs = p.Parse(args); // Check for required arguments. Display the help message if any of them are missing. if (string.IsNullOrEmpty(parameters.referenceFile)) { Console.Error.WriteLine("Please specify the Canvas k-uniqueness reference file."); needHelp = true; } else if (string.IsNullOrEmpty(parameters.outFile)) { Console.Error.WriteLine("Please specify an output file name."); needHelp = true; } else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin == -1) { Console.Error.WriteLine("Please specify counts per bin."); needHelp = true; } else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && string.IsNullOrEmpty(parameters.chromosome) && parameters.intermediatePaths.Count == 0) { Console.Error.WriteLine("Please specify chromsome to measure coverage for."); needHelp = true; } if (needHelp) { ShowHelp(p); return null; } // Does the reference file exist? if (!File.Exists(parameters.referenceFile)) { Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.referenceFile); return null; } // Does the bam file exist? if (parameters.bamFile == null || !File.Exists(parameters.bamFile)) { Console.WriteLine("CanvasBin.exe: Alignment input does not exist! Exiting."); return null; } // Does the BED file exist? if ((parameters.filterFile != null) && (!File.Exists(parameters.filterFile))) { Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.filterFile); return null; } // Did the user supply a non-negative number? if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin < 1) { Console.WriteLine("CanvasBin.exe: Median counts must be strictly positive. Exiting."); return null; } return parameters; }
public FragmentBinner(CanvasBinParameters parameters) { this.parameters = parameters; }
/// <summary> /// Implements the Canvas binning algorithm /// </summary> public static int Run(CanvasBinParameters parameters) { // Will hold a bunch of BitArrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A position will be marked 'true' if the mer starting at that position is unique in the genome. Dictionary <string, BitArray> possibleAlignments = new Dictionary <string, BitArray>(); // Will hold a bunch of HitArrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A position will be marked with the number of times the mer starting at that position // is observed in the SAM file. Dictionary <string, HitArray> observedAlignments = new Dictionary <string, HitArray>(); // Will hold a bunch of byte arrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A value at a given index will represents fragment length of the read starting at that index Dictionary <string, Int16[]> fragmentLengths = new Dictionary <string, Int16[]>(); if (parameters.intermediatePaths.Count == 0) { BinOneGenomicInterval(parameters, possibleAlignments, observedAlignments, fragmentLengths); return(0); } //load our intermediate data files List <string> inputFiles = new List <string>(parameters.intermediatePaths); Object semaphore = new object(); // control access to possibleAlignments, observedAlignments, fragmentLengths // retrieve the number of processors //int processorCoreCount = Environment.ProcessorCount; int processorCoreCount = 1; // Limit # of deserialization threads to avoid (rare) protobuf issue. List <Thread> threads = new List <Thread>(); Console.WriteLine("Start deserialization:"); Console.Out.Flush(); while (threads.Count > 0 || inputFiles.Count > 0) { // Remove defunct threads: threads.RemoveAll(t => !t.IsAlive); if (threads.Count == processorCoreCount) { Thread.Sleep(1000); continue; } while (inputFiles.Count > 0 && threads.Count < processorCoreCount) { string inputFile = inputFiles.First(); ThreadStart threadDelegate = new ThreadStart(() => DeserializeCanvasData(inputFile, possibleAlignments, observedAlignments, fragmentLengths, semaphore, parameters.coverageMode)); Thread newThread = new Thread(threadDelegate); threads.Add(newThread); newThread.Name = "CanvasBin " + inputFiles[0]; Console.WriteLine(newThread.Name); newThread.Start(); inputFiles.RemoveAt(0); } } Console.WriteLine("{0} Deserialization complete", DateTime.Now); Console.Out.Flush(); NexteraManifest manifest = parameters.manifestFile == null ? null : new NexteraManifest(parameters.manifestFile, null, Console.WriteLine); if (parameters.binSize == -1) { // Turn the desired # of alignments per bin into the number of possible alignments expected per bin. parameters.binSize = CalculateNumberOfPossibleAlignmentsPerBin(parameters.countsPerBin, possibleAlignments, observedAlignments, manifest: manifest); } if (parameters.binSizeOnly) { // Write bin size to file System.IO.File.WriteAllText(parameters.outFile + ".binsize", "" + parameters.binSize); return(0); } Dictionary <string, List <GenomicBin> > predefinedBins = null; if (parameters.predefinedBinsFile != null) { // Read predefined bins predefinedBins = Utilities.LoadBedFile(parameters.predefinedBinsFile); } // Bin alignments. List <GenomicBin> bins = BinCounts(parameters.referenceFile, parameters.binSize, parameters.coverageMode, manifest, possibleAlignments, observedAlignments, fragmentLengths, predefinedBins, parameters.outFile); // Output! Console.WriteLine("{0} Output binned counts:", DateTime.Now); CanvasIO.WriteToTextFile(parameters.outFile, bins); Console.WriteLine("{0} Output complete", DateTime.Now); Console.Out.Flush(); return(0); }
/// <summary> /// Implements the Canvas binning algorithm /// </summary> public static int Run(CanvasBinParameters parameters) { // Will hold a bunch of BitArrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A position will be marked 'true' if the mer starting at that position is unique in the genome. Dictionary<string, BitArray> possibleAlignments = new Dictionary<string, BitArray>(); // Will hold a bunch of HitArrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A position will be marked with the number of times the mer starting at that position // is observed in the SAM file. Dictionary<string, HitArray> observedAlignments = new Dictionary<string, HitArray>(); // Will hold a bunch of byte arrays, one for each chromosome. // Each one's length corresponds to the length of the chromosome it represents. // A value at a given index will represents fragment length of the read starting at that index Dictionary<string, Int16[]> fragmentLengths = new Dictionary<string, Int16[]>(); Console.WriteLine("{0} Parsed command-line", DateTime.Now); if (parameters.intermediatePaths.Count == 0) { BinOneGenomicInterval(parameters, possibleAlignments, observedAlignments, fragmentLengths); return 0; } //load our intermediate data files List<string> inputFiles = new List<string>(parameters.intermediatePaths); Object semaphore = new object(); // control access to possibleAlignments, observedAlignments, fragmentLengths // retrieve the number of processors //int processorCoreCount = Environment.ProcessorCount; int processorCoreCount = 1; // Limit # of deserialization threads to avoid (rare) protobuf issue. List<Thread> threads = new List<Thread>(); Console.WriteLine("Start deserialization:"); Console.Out.Flush(); while (threads.Count > 0 || inputFiles.Count > 0) { // Remove defunct threads: threads.RemoveAll(t => !t.IsAlive); if (threads.Count == processorCoreCount) { Thread.Sleep(1000); continue; } while (inputFiles.Count > 0 && threads.Count < processorCoreCount) { string inputFile = inputFiles.First(); ThreadStart threadDelegate = new ThreadStart(() => DeserializeCanvasData(inputFile, possibleAlignments, observedAlignments, fragmentLengths, semaphore, parameters.coverageMode)); Thread newThread = new Thread(threadDelegate); threads.Add(newThread); newThread.Name = "CanvasBin " + inputFiles[0]; Console.WriteLine(newThread.Name); newThread.Start(); inputFiles.RemoveAt(0); } } Console.WriteLine("{0} Deserialization complete", DateTime.Now); Console.Out.Flush(); NexteraManifest manifest = parameters.manifestFile == null ? null : new NexteraManifest(parameters.manifestFile, null, Console.WriteLine); if (parameters.binSize == -1) { // Turn the desired # of alignments per bin into the number of possible alignments expected per bin. parameters.binSize = CalculateNumberOfPossibleAlignmentsPerBin(parameters.countsPerBin, possibleAlignments, observedAlignments, manifest: manifest); } if (parameters.binSizeOnly) { // Write bin size to file System.IO.File.WriteAllText(parameters.outFile + ".binsize", "" + parameters.binSize); return 0; } Dictionary<string, List<GenomicBin>> predefinedBins = null; if (parameters.predefinedBinsFile != null) { // Read predefined bins predefinedBins = ReadPredefinedBins(parameters.predefinedBinsFile); } // Bin alignments. List<GenomicBin> bins = BinCounts(parameters.referenceFile, parameters.binSize, parameters.coverageMode, manifest, possibleAlignments, observedAlignments, fragmentLengths, predefinedBins, parameters.outFile); // Output! Console.WriteLine("{0} Output binned counts:", DateTime.Now); CanvasIO.WriteToTextFile(parameters.outFile, bins); Console.WriteLine("{0} Output complete", DateTime.Now); Console.Out.Flush(); return 0; }
private static int BinOneGenomicInterval(CanvasBinParameters parameters, Dictionary<string, BitArray> possibleAlignments, Dictionary<string, HitArray> observedAlignments, Dictionary<string, Int16[]> fragmentLengths) { InitializeAlignmentArrays(parameters.referenceFile, parameters.chromosome, parameters.coverageMode, possibleAlignments, observedAlignments, fragmentLengths); Console.WriteLine("{0} Initialized alignment arrays", DateTime.Now); LoadObservedAlignmentsBAM(parameters.bamFile, parameters.isPairedEnd, parameters.chromosome, parameters.coverageMode, observedAlignments[parameters.chromosome], fragmentLengths[parameters.chromosome]); Console.WriteLine("{0} Loaded observed alignments", DateTime.Now); // Filter on BED file. if (parameters.filterFile != null) ExcludeTagsOverlappingFilterFile(parameters.filterFile, possibleAlignments); // Make sure we don't have an 'impossible' observed alignment. ScreenObservedTags(observedAlignments, possibleAlignments); Console.WriteLine("{0} Serialize intermediate data", DateTime.Now); //output binary intermediate file IntermediateData data = new IntermediateData(possibleAlignments, observedAlignments, fragmentLengths, parameters.coverageMode); Directory.CreateDirectory(Path.GetDirectoryName(parameters.outFile)); using (FileStream stream = new FileStream(parameters.outFile, FileMode.Create, FileAccess.Write)) { ProtoBuf.Serializer.Serialize<IntermediateData>(stream, data); } Console.WriteLine("{0} Intermediate data serialized", DateTime.Now); return 0; }