Ejemplo n.º 1
0
        private static int BinOneGenomicInterval(CanvasBinParameters parameters,
                                                 Dictionary <string, BitArray> possibleAlignments,
                                                 Dictionary <string, HitArray> observedAlignments,
                                                 Dictionary <string, Int16[]> fragmentLengths)
        {
            InitializeAlignmentArrays(parameters.referenceFile, parameters.chromosome, parameters.coverageMode, possibleAlignments, observedAlignments, fragmentLengths);
            Console.WriteLine("{0} Initialized alignment arrays", DateTime.Now);
            LoadObservedAlignmentsBAM(parameters.bamFile, parameters.isPairedEnd, parameters.chromosome, parameters.coverageMode, observedAlignments[parameters.chromosome], fragmentLengths[parameters.chromosome]);
            Console.WriteLine("{0} Loaded observed alignments", DateTime.Now);

            // Filter on BED file.
            if (parameters.filterFile != null)
            {
                ExcludeTagsOverlappingFilterFile(parameters.filterFile, possibleAlignments);
            }

            // Make sure we don't have an 'impossible' observed alignment.
            ScreenObservedTags(observedAlignments, possibleAlignments);

            Console.WriteLine("{0} Serialize intermediate data", DateTime.Now);
            //output binary intermediate file
            IntermediateData data = new IntermediateData(possibleAlignments, observedAlignments, fragmentLengths, parameters.coverageMode);

            Directory.CreateDirectory(Path.GetDirectoryName(parameters.outFile));
            using (FileStream stream = new FileStream(parameters.outFile, FileMode.Create, FileAccess.Write))
            {
                ProtoBuf.Serializer.Serialize <IntermediateData>(stream, data);
            }
            Console.WriteLine("{0} Intermediate data serialized", DateTime.Now);
            return(0);
        }
Ejemplo n.º 2
0
        public static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            CanvasBinParameters parameters = CanvasBinParameters.ParseCommandLine(args);

            if (parameters == null)
            {
                return(1);
            }

            Console.WriteLine("{0} Parsed command-line", DateTime.Now);
            if (parameters.coverageMode == CanvasCommon.CanvasCoverageMode.Fragment)
            {
                return((new FragmentBinner(parameters)).Bin());
            }
            else
            {
                return(CanvasBin.Run(parameters));
            }
        }
Ejemplo n.º 3
0
        public void TestAllChromsInBedAreInBam()
        {
            CanvasBinParameters parameters = new CanvasBinParameters();
            string assemblyFolder = Isas.Shared.Utilities.GetAssemblyFolder(typeof(TestCanvasBin));
            string dataFolder = Path.Combine(assemblyFolder, "Data");
            parameters.predefinedBinsFile = Path.Combine(dataFolder, "bins_chrU.bed");
            parameters.bamFile = Path.Combine(dataFolder, "single-end.bam");
            parameters.isPairedEnd = true;

            FragmentBinner fragmentBinner = new FragmentBinner(parameters);
            bool exceptionCaught = false;
            try
            {
                fragmentBinner.Bin();
            }
            catch (ApplicationException e)
            {
                if (e.Message.Contains(String.Format("Not all chromosomes in {0} are found in {1}.", parameters.predefinedBinsFile, parameters.bamFile)))
                    exceptionCaught = true;
            }
            Assert.IsTrue(exceptionCaught);
        }
Ejemplo n.º 4
0
        public static CanvasBinParameters ParseCommandLine(string[] args)
        {
            CanvasBinParameters parameters = new CanvasBinParameters();
            // Should I display a help message?
            bool needHelp = false;

            OptionSet p = new OptionSet()
            {
                { "b|bam=", "bam file containing unique alignments", v => parameters.bamFile = v },
                { "r|reference=", "Canvas-ready reference fasta file", v => parameters.referenceFile = v },
                { "c|chr=", "for bam input, only work on this chromosome. Output intermediate binary data. Must follow-up with a single CanvasBin call passing all the intermediate binary data files (see -i option)", v => parameters.chromosome = v },
                { "i|infile=", "intermediate binary data file from individual chromosome. Pass this option multiple times, once for each chromosome", v => parameters.intermediatePaths.Add(v) },
                { "f|filter=", "bed file containing regions to ignore", v => parameters.filterFile = v },
                { "d|bindepth=", "median counts desired in each bin", v => parameters.countsPerBin = Convert.ToInt32(v) },
                { "z|binsize=", "bin size; optional", v => parameters.binSize = Convert.ToInt32(v) },
                { "o|outfile=", "text file to output containing computed bins, or if -c option was specified the intermediate binary data file to output", v => parameters.outFile = v },
                { "y|binsizeonly", "calcualte bin size and exit", v => parameters.binSizeOnly = v != null },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "p|paired-end", "input .bam is a paired-end alignment (e.g. from Isaac)", v => parameters.isPairedEnd = v != null },
                { "m|mode=", "coverage measurement mode", v => parameters.coverageMode = CanvasCommon.Utilities.ParseCanvasCoverageMode(v) },
                { "t|manifest=", "Nextera manifest file", v => parameters.manifestFile = v },
                { "n|bins=", "bed file containing predefined bins", v => parameters.predefinedBinsFile = v },
            };

            Console.WriteLine("CanvasBin {0}", System.Reflection.Assembly.GetEntryAssembly().GetName().Version.ToString());

            List <string> extraArgs = p.Parse(args);

            // Check for required arguments. Display the help message if any of them are missing.
            if (string.IsNullOrEmpty(parameters.referenceFile))
            {
                Console.Error.WriteLine("Please specify the Canvas k-uniqueness reference file.");
                needHelp = true;
            }
            else if (string.IsNullOrEmpty(parameters.outFile))
            {
                Console.Error.WriteLine("Please specify an output file name.");
                needHelp = true;
            }
            else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin == -1)
            {
                Console.Error.WriteLine("Please specify counts per bin.");
                needHelp = true;
            }
            else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment &&
                     string.IsNullOrEmpty(parameters.chromosome) && parameters.intermediatePaths.Count == 0)
            {
                Console.Error.WriteLine("Please specify chromsome to measure coverage for.");
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(null);
            }

            // Does the reference file exist?
            if (!File.Exists(parameters.referenceFile))
            {
                Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.referenceFile);
                return(null);
            }

            // Does the bam file exist?
            if (parameters.bamFile == null || !File.Exists(parameters.bamFile))
            {
                Console.WriteLine("CanvasBin.exe: Alignment input does not exist! Exiting.");
                return(null);
            }

            // Does the BED file exist?
            if ((parameters.filterFile != null) && (!File.Exists(parameters.filterFile)))
            {
                Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.filterFile);
                return(null);
            }

            // Did the user supply a non-negative number?
            if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin < 1)
            {
                Console.WriteLine("CanvasBin.exe: Median counts must be strictly positive. Exiting.");
                return(null);
            }

            return(parameters);
        }
Ejemplo n.º 5
0
        public static CanvasBinParameters ParseCommandLine(string[] args)
        {
            CanvasBinParameters parameters = new CanvasBinParameters();
            // Should I display a help message?
            bool needHelp = false;

            OptionSet p = new OptionSet()
                {
                    { "b|bam=",           "bam file containing unique alignments", v => parameters.bamFile = v },
                    { "r|reference=",     "Canvas-ready reference fasta file", v => parameters.referenceFile = v },
                    { "c|chr=",           "for bam input, only work on this chromosome. Output intermediate binary data. Must follow-up with a single CanvasBin call passing all the intermediate binary data files (see -i option)", v => parameters.chromosome = v},
                    { "i|infile=",        "intermediate binary data file from individual chromosome. Pass this option multiple times, once for each chromosome", v => parameters.intermediatePaths.Add(v)},
                    { "f|filter=",        "bed file containing regions to ignore",             v => parameters.filterFile = v },
                    { "d|bindepth=",      "median counts desired in each bin",                v => parameters.countsPerBin = Convert.ToInt32(v) },
                    { "z|binsize=",       "bin size; optional",                               v => parameters.binSize = Convert.ToInt32(v) },
                    { "o|outfile=",       "text file to output containing computed bins, or if -c option was specified the intermediate binary data file to output",     v => parameters.outFile = v },
                    { "y|binsizeonly",    "calcualte bin size and exit",                      v => parameters.binSizeOnly = v != null },
                    { "h|help",           "show this message and exit",                       v => needHelp = v != null },
                    { "p|paired-end",     "input .bam is a paired-end alignment (e.g. from Isaac)", v => parameters.isPairedEnd = v != null},
                    { "m|mode=",          "coverage measurement mode",                       v => parameters.coverageMode = CanvasCommon.Utilities.ParseCanvasCoverageMode(v) },
                    { "t|manifest=",      "Nextera manifest file",                       v => parameters.manifestFile = v },
                    { "n|bins=",          "bed file containing predefined bins",              v => parameters.predefinedBinsFile = v },
                };

            Console.WriteLine("CanvasBin {0}", System.Reflection.Assembly.GetEntryAssembly().GetName().Version.ToString());

            List<string> extraArgs = p.Parse(args);

            // Check for required arguments. Display the help message if any of them are missing.
            if (string.IsNullOrEmpty(parameters.referenceFile))
            {
                Console.Error.WriteLine("Please specify the Canvas k-uniqueness reference file.");
                needHelp = true;
            }
            else if (string.IsNullOrEmpty(parameters.outFile))
            {
                Console.Error.WriteLine("Please specify an output file name.");
                needHelp = true;
            }
            else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin == -1)
            {
                Console.Error.WriteLine("Please specify counts per bin.");
                needHelp = true;
            }
            else if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment
                && string.IsNullOrEmpty(parameters.chromosome) && parameters.intermediatePaths.Count == 0)
            {
                Console.Error.WriteLine("Please specify chromsome to measure coverage for.");
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return null;
            }

            // Does the reference file exist?
            if (!File.Exists(parameters.referenceFile))
            {
                Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.referenceFile);
                return null;
            }

            // Does the bam file exist?
            if (parameters.bamFile == null || !File.Exists(parameters.bamFile))
            {
                Console.WriteLine("CanvasBin.exe: Alignment input does not exist! Exiting.");
                return null;
            }

            // Does the BED file exist?
            if ((parameters.filterFile != null) && (!File.Exists(parameters.filterFile)))
            {
                Console.WriteLine("CanvasBin.exe: File {0} does not exist! Exiting.", parameters.filterFile);
                return null;
            }

            // Did the user supply a non-negative number?
            if (parameters.coverageMode != CanvasCommon.CanvasCoverageMode.Fragment && parameters.countsPerBin < 1)
            {
                Console.WriteLine("CanvasBin.exe: Median counts must be strictly positive. Exiting.");
                return null;
            }

            return parameters;
        }
Ejemplo n.º 6
0
 public FragmentBinner(CanvasBinParameters parameters)
 {
     this.parameters = parameters;
 }
Ejemplo n.º 7
0
        /// <summary>
        /// Implements the Canvas binning algorithm
        /// </summary>
        public static int Run(CanvasBinParameters parameters)
        {
            // Will hold a bunch of BitArrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A position will be marked 'true' if the mer starting at that position is unique in the genome.
            Dictionary <string, BitArray> possibleAlignments = new Dictionary <string, BitArray>();

            // Will hold a bunch of HitArrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A position will be marked with the number of times the mer starting at that position
            // is observed in the SAM file.
            Dictionary <string, HitArray> observedAlignments = new Dictionary <string, HitArray>();

            // Will hold a bunch of byte arrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A value at a given index will represents fragment length of the read starting at that index
            Dictionary <string, Int16[]> fragmentLengths = new Dictionary <string, Int16[]>();

            if (parameters.intermediatePaths.Count == 0)
            {
                BinOneGenomicInterval(parameters, possibleAlignments, observedAlignments, fragmentLengths);
                return(0);
            }

            //load our intermediate data files
            List <string> inputFiles = new List <string>(parameters.intermediatePaths);
            Object        semaphore  = new object(); // control access to possibleAlignments, observedAlignments, fragmentLengths
            // retrieve the number of processors
            //int processorCoreCount = Environment.ProcessorCount;
            int           processorCoreCount = 1; // Limit # of deserialization threads to avoid (rare) protobuf issue.
            List <Thread> threads            = new List <Thread>();

            Console.WriteLine("Start deserialization:");
            Console.Out.Flush();
            while (threads.Count > 0 || inputFiles.Count > 0)
            {
                // Remove defunct threads:
                threads.RemoveAll(t => !t.IsAlive);
                if (threads.Count == processorCoreCount)
                {
                    Thread.Sleep(1000);
                    continue;
                }
                while (inputFiles.Count > 0 && threads.Count < processorCoreCount)
                {
                    string      inputFile      = inputFiles.First();
                    ThreadStart threadDelegate = new ThreadStart(() => DeserializeCanvasData(inputFile, possibleAlignments, observedAlignments, fragmentLengths, semaphore, parameters.coverageMode));
                    Thread      newThread      = new Thread(threadDelegate);
                    threads.Add(newThread);
                    newThread.Name = "CanvasBin " + inputFiles[0];
                    Console.WriteLine(newThread.Name);
                    newThread.Start();
                    inputFiles.RemoveAt(0);
                }
            }
            Console.WriteLine("{0} Deserialization complete", DateTime.Now);
            Console.Out.Flush();

            NexteraManifest manifest = parameters.manifestFile == null ? null : new NexteraManifest(parameters.manifestFile, null, Console.WriteLine);

            if (parameters.binSize == -1)
            {
                // Turn the desired # of alignments per bin into the number of possible alignments expected per bin.
                parameters.binSize = CalculateNumberOfPossibleAlignmentsPerBin(parameters.countsPerBin, possibleAlignments, observedAlignments,
                                                                               manifest: manifest);
            }

            if (parameters.binSizeOnly)
            {
                // Write bin size to file
                System.IO.File.WriteAllText(parameters.outFile + ".binsize", "" + parameters.binSize);
                return(0);
            }

            Dictionary <string, List <GenomicBin> > predefinedBins = null;

            if (parameters.predefinedBinsFile != null)
            {
                // Read predefined bins
                predefinedBins = Utilities.LoadBedFile(parameters.predefinedBinsFile);
            }

            // Bin alignments.
            List <GenomicBin> bins = BinCounts(parameters.referenceFile, parameters.binSize, parameters.coverageMode, manifest,
                                               possibleAlignments, observedAlignments, fragmentLengths, predefinedBins, parameters.outFile);

            // Output!
            Console.WriteLine("{0} Output binned counts:", DateTime.Now);
            CanvasIO.WriteToTextFile(parameters.outFile, bins);
            Console.WriteLine("{0} Output complete", DateTime.Now);
            Console.Out.Flush();
            return(0);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Implements the Canvas binning algorithm
        /// </summary>
        public static int Run(CanvasBinParameters parameters)
        {
            // Will hold a bunch of BitArrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A position will be marked 'true' if the mer starting at that position is unique in the genome.
            Dictionary<string, BitArray> possibleAlignments = new Dictionary<string, BitArray>();

            // Will hold a bunch of HitArrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A position will be marked with the number of times the mer starting at that position 
            // is observed in the SAM file.
            Dictionary<string, HitArray> observedAlignments = new Dictionary<string, HitArray>();

            // Will hold a bunch of byte arrays, one for each chromosome.
            // Each one's length corresponds to the length of the chromosome it represents.
            // A value at a given index will represents fragment length of the read starting at that index
            Dictionary<string, Int16[]> fragmentLengths = new Dictionary<string, Int16[]>();

            Console.WriteLine("{0} Parsed command-line", DateTime.Now);

            if (parameters.intermediatePaths.Count == 0)
            {
                BinOneGenomicInterval(parameters, possibleAlignments, observedAlignments, fragmentLengths);
                return 0;
            }

            //load our intermediate data files
            List<string> inputFiles = new List<string>(parameters.intermediatePaths);
            Object semaphore = new object(); // control access to possibleAlignments, observedAlignments, fragmentLengths
            // retrieve the number of processors
            //int processorCoreCount = Environment.ProcessorCount;
            int processorCoreCount = 1; // Limit # of deserialization threads to avoid (rare) protobuf issue.
            List<Thread> threads = new List<Thread>();
            Console.WriteLine("Start deserialization:");
            Console.Out.Flush();
            while (threads.Count > 0 || inputFiles.Count > 0)
            {
                // Remove defunct threads:
                threads.RemoveAll(t => !t.IsAlive);
                if (threads.Count == processorCoreCount)
                {
                    Thread.Sleep(1000);
                    continue;
                }
                while (inputFiles.Count > 0 && threads.Count < processorCoreCount)
                {
                    string inputFile = inputFiles.First();
                    ThreadStart threadDelegate = new ThreadStart(() => DeserializeCanvasData(inputFile, possibleAlignments, observedAlignments, fragmentLengths, semaphore, parameters.coverageMode));
                    Thread newThread = new Thread(threadDelegate);
                    threads.Add(newThread);
                    newThread.Name = "CanvasBin " + inputFiles[0];
                    Console.WriteLine(newThread.Name);
                    newThread.Start();
                    inputFiles.RemoveAt(0);
                }
            }
            Console.WriteLine("{0} Deserialization complete", DateTime.Now);
            Console.Out.Flush();

            NexteraManifest manifest = parameters.manifestFile == null ? null : new NexteraManifest(parameters.manifestFile, null, Console.WriteLine);

            if (parameters.binSize == -1)
            {
                // Turn the desired # of alignments per bin into the number of possible alignments expected per bin.
                parameters.binSize = CalculateNumberOfPossibleAlignmentsPerBin(parameters.countsPerBin, possibleAlignments, observedAlignments,
                    manifest: manifest);
            }

            if (parameters.binSizeOnly)
            {
                // Write bin size to file
                System.IO.File.WriteAllText(parameters.outFile + ".binsize", "" + parameters.binSize);
                return 0;
            }

            Dictionary<string, List<GenomicBin>> predefinedBins = null;
            if (parameters.predefinedBinsFile != null)
            {
                // Read predefined bins
                predefinedBins = ReadPredefinedBins(parameters.predefinedBinsFile);
            }

            // Bin alignments.
            List<GenomicBin> bins = BinCounts(parameters.referenceFile, parameters.binSize, parameters.coverageMode, manifest,
                possibleAlignments, observedAlignments, fragmentLengths, predefinedBins, parameters.outFile);
            // Output!
            Console.WriteLine("{0} Output binned counts:", DateTime.Now);
            CanvasIO.WriteToTextFile(parameters.outFile, bins);
            Console.WriteLine("{0} Output complete", DateTime.Now);
            Console.Out.Flush();
            return 0;
        }
Ejemplo n.º 9
0
        private static int BinOneGenomicInterval(CanvasBinParameters parameters,
            Dictionary<string, BitArray> possibleAlignments,
            Dictionary<string, HitArray> observedAlignments,
            Dictionary<string, Int16[]> fragmentLengths)
        {
            InitializeAlignmentArrays(parameters.referenceFile, parameters.chromosome, parameters.coverageMode, possibleAlignments, observedAlignments, fragmentLengths);
            Console.WriteLine("{0} Initialized alignment arrays", DateTime.Now);
            LoadObservedAlignmentsBAM(parameters.bamFile, parameters.isPairedEnd, parameters.chromosome, parameters.coverageMode, observedAlignments[parameters.chromosome], fragmentLengths[parameters.chromosome]);
            Console.WriteLine("{0} Loaded observed alignments", DateTime.Now);

            // Filter on BED file.
            if (parameters.filterFile != null)
                ExcludeTagsOverlappingFilterFile(parameters.filterFile, possibleAlignments);

            // Make sure we don't have an 'impossible' observed alignment.
            ScreenObservedTags(observedAlignments, possibleAlignments);

            Console.WriteLine("{0} Serialize intermediate data", DateTime.Now);
            //output binary intermediate file
            IntermediateData data = new IntermediateData(possibleAlignments, observedAlignments, fragmentLengths, parameters.coverageMode);
            Directory.CreateDirectory(Path.GetDirectoryName(parameters.outFile));
            using (FileStream stream = new FileStream(parameters.outFile, FileMode.Create, FileAccess.Write))
            {
                ProtoBuf.Serializer.Serialize<IntermediateData>(stream, data);
            }
            Console.WriteLine("{0} Intermediate data serialized", DateTime.Now);
            return 0;
        }
Ejemplo n.º 10
0
 public FragmentBinner(CanvasBinParameters parameters)
 {
     this.parameters = parameters;
 }