/// <summary>
        /// Refine layout in the delta alignments.
        /// </summary>
        public void RefineLayout()
        {
            TimeSpan  timeSpan     = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();

            runAlgorithm.Restart();
            FileInfo inputFileinfo   = new FileInfo(this.FilePath[1]);
            long     inputFileLength = inputFileinfo.Length;
            FastASequencePositionParser queryParser;

            using (var input = File.OpenRead(FilePath[1]))
            {
                queryParser = new FastASequencePositionParser(input, true);
                queryParser.CacheSequencesForRandomAccess();
            }
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time   : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size              : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[0]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            using (var input = File.OpenRead(FilePath[0]))
                using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(input, queryParser))
                {
                    runAlgorithm.Stop();

                    if (this.Verbose)
                    {
                        Output.WriteLine(OutputLevel.Verbose);
                        Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0]));
                        Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time      : {0}", runAlgorithm.Elapsed);
                        Output.WriteLine(OutputLevel.Verbose, "   File Size                 : {0}", inputFileLength);
                    }

                    runAlgorithm.Restart();
                    IEnumerable <DeltaAlignment> result = LayoutRefiner.RefineLayout(deltaCollection);
                    DeltaAlignmentSorter         sorter = new DeltaAlignmentSorter();
                    WriteDelta(result, sorter, UnsortedLayoutRefinmentOutputFilename);
                    runAlgorithm.Stop();
                    timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

                    runAlgorithm.Restart();
                    WriteSortedDelta(sorter, UnsortedLayoutRefinmentOutputFilename, queryParser, this.OutputFile);
                    runAlgorithm.Stop();
                }

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed);
            }
        }
Exemple #2
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            if (this.FilePath.Length != 2)
            {
                Console.Error.WriteLine("\nError: A reference file and 1 query file are required.");
                Environment.Exit(-1);
            }

            TimeSpan  timeSpan        = new TimeSpan();
            Stopwatch runAlgorithm    = new Stopwatch();
            FileInfo  inputFileinfo   = new FileInfo(this.FilePath[0]);
            long      inputFileLength = inputFileinfo.Length;

            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();
            // Parse input files
            IEnumerable <ISequence> referenceSequences = new FastAParser(this.FilePath[0]).Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed reference file: {0}", Path.GetFullPath(this.FilePath[0]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true);

            queryParser.CacheSequencesForRandomAccess();
            IEnumerable <ISequence> reads = queryParser.Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed reads file: {0}", Path.GetFullPath(this.FilePath[1]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();
            ValidateAmbiguousReads(reads);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
            }

            runAlgorithm.Restart();
            ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler();

            assembler.StatusChanged     += new EventHandler <StatusChangedEventArgs>(this.AssemblerStatusChanged);
            assembler.ScaffoldingEnabled = this.Scaffold;
            assembler.KmerLength         = this.KmerLength;
            assembler.LengthOfMum        = this.MumLength;
            IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser);

            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();

            if (this.OutputFile == null)
            {
                // Write output to console.
                this.WriteContigs(assemblerResult, Console.Out);
            }
            else
            {
                // Write output to the specified file.
                this.WriteContigs(assemblerResult, null);
                Console.WriteLine(Resources.OutPutWrittenToFileSpecified);
            }
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine("  Assemble time: {0}", timeSpan);
                Console.Error.WriteLine("  Write() time: {0}", runAlgorithm.Elapsed);
            }
        }
Exemple #3
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            if (this.FilePath.Length != 2)
            {
                Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required.");
                return;
            }

            TimeSpan  timeSpan        = new TimeSpan();
            Stopwatch runAlgorithm    = new Stopwatch();
            FileInfo  inputFileinfo   = new FileInfo(this.FilePath[0]);
            long      inputFileLength = inputFileinfo.Length;

            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();

            // Parse input files
            IEnumerable <ISequence> referenceSequences = ParseFile(this.FilePath[0]);

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size            : {0}", inputFileLength);
            }

            inputFileinfo   = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true);

            queryParser.CacheSequencesForRandomAccess();
            IEnumerable <ISequence> reads = queryParser.Parse();

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reads file   : {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();
            ValidateAmbiguousReads(reads);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose);
            }

            runAlgorithm.Restart();
            ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler();

            assembler.StatusChanged     += this.AssemblerStatusChanged;
            assembler.ScaffoldingEnabled = this.Scaffold;
            assembler.KmerLength         = this.KmerLength;
            assembler.LengthOfMum        = this.MumLength;
            IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser);

            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();

            this.WriteContigs(assemblerResult);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed);
            }
        }
Exemple #4
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <param name="referenceSequence">The sequence used as backbone for assembly.</param>
        /// <param name="queryParser">The parser to load the sequences to assemble.</param>
        /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns>
        public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser)
        {
            this._progressTimer          = new Timer(ProgressTimerInterval);
            this._progressTimer.Elapsed += this.ProgressTimerElapsed;
            if (queryParser == null)
            {
                throw new ArgumentNullException("queryParser");
            }

            string readAlignmentOutputFilename            = null;
            string unsortedRepeatResolutionOutputFilename = null;
            string repeateResolutionOutputFilename        = null;
            string unsortedLayoutRefinmentOutputFilename  = null;
            string layoutRefinmentOutputFileName          = null;

            try
            {
                // Converting to list to avoid multiple parse of the reference file if its a yield return
                var refSequences = referenceSequence.ToList();

                // CacheSequencesForRandomAccess will ignore the call if called more than once.
                queryParser.CacheSequencesForRandomAccess();
                IEnumerable <ISequence> reads = queryParser.Parse();

                // Comparative Assembly Steps
                // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence)
                this.StatusEventStart(Properties.Resources.ReadAlignmentStarted);
                IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences,
                                                                                                    reads.Where(a => a.Count >= this.LengthOfMum));

                readAlignmentOutputFilename = Path.GetTempFileName();
                WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputFilename);
                this.StatusEventEnd(Properties.Resources.ReadAlignmentEnded);

                // 2) Repeat Resolution
                this.StatusEventStart(Properties.Resources.RepeatResolutionStarted);
                DeltaAlignmentSorter sorter;

                unsortedRepeatResolutionOutputFilename = Path.GetTempFileName();
                using (DeltaAlignmentCollection deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputFilename, queryParser))
                {
                    IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputFilename);
                }

                this.StatusEventEnd(Properties.Resources.RepeatResolutionEnded);
                this.StatusEventStart(Properties.Resources.SortingResolvedDeltasStarted);

                repeateResolutionOutputFilename = Path.GetTempFileName();
                WriteSortedDelta(sorter, unsortedRepeatResolutionOutputFilename, queryParser, repeateResolutionOutputFilename);
                this.StatusEventEnd(Properties.Resources.SortingResolvedDeltasEnded);

                // 3) Layout Refinement
                this.StatusEventStart(Properties.Resources.LayoutRefinementStarted);

                layoutRefinmentOutputFileName = Path.GetTempFileName();
                using (DeltaAlignmentCollection unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeateResolutionOutputFilename, queryParser))
                {
                    unsortedLayoutRefinmentOutputFilename = Path.GetTempFileName();
                    IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputFilename);
                    WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputFilename, queryParser, layoutRefinmentOutputFileName);
                }

                this.StatusEventEnd(Properties.Resources.LayoutRefinementEnded);

                // 4) Consensus Generation
                this.StatusEventStart(Properties.Resources.ConsensusGenerationStarted);
                IList <ISequence> contigs;
                using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputFileName, queryParser))
                {
                    contigs = this.ConsensusGenerator(delta).ToList();
                }
                this.StatusEventEnd(Properties.Resources.ConsensusGenerationEnded);

                if (this.ScaffoldingEnabled)
                {
                    // 5) Scaffold Generation
                    this.StatusEventStart(Properties.Resources.ScaffoldGenerationStarted);
                    IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads);
                    this.StatusEventEnd(Properties.Resources.ScaffoldGenerationEnded);
                    return(scaffolds);
                }
                else
                {
                    return(contigs);
                }
            }
            finally
            {
                this._progressTimer.Stop();

                // Cleanup temp files.
                if (!string.IsNullOrEmpty(readAlignmentOutputFilename))
                {
                    File.Delete(readAlignmentOutputFilename);
                }
                if (!string.IsNullOrEmpty(unsortedRepeatResolutionOutputFilename))
                {
                    File.Delete(unsortedRepeatResolutionOutputFilename);
                }
                if (!string.IsNullOrEmpty(repeateResolutionOutputFilename))
                {
                    File.Delete(repeateResolutionOutputFilename);
                }
                if (!string.IsNullOrEmpty(unsortedLayoutRefinmentOutputFilename))
                {
                    File.Delete(unsortedLayoutRefinmentOutputFilename);
                }
                if (!string.IsNullOrEmpty(layoutRefinmentOutputFileName))
                {
                    File.Delete(layoutRefinmentOutputFileName);
                }
            }
        }
Exemple #5
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <param name="referenceSequence">The sequence used as backbone for assembly.</param>
        /// <param name="queryParser">The parser to load the sequences to assemble.</param>
        /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns>
        public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser)
        {
            if (referenceSequence == null)
            {
                throw new ArgumentNullException("referenceSequence");
            }

            if (queryParser == null)
            {
                throw new ArgumentNullException("queryParser");
            }

            Stream readAlignmentOutputStream            = null;
            Stream unsortedRepeatResolutionOutputStream = null;
            Stream repeatResolutionOutputStream         = null;
            Stream unsortedLayoutRefinmentOutputStream  = null;
            Stream layoutRefinmentOutputStream          = null;

            try
            {
                // Converting to list to avoid multiple parse of the reference file if its a yield return
                var refSequences = referenceSequence.ToList();

                // CacheSequencesForRandomAccess will ignore the call if called more than once.
                queryParser.CacheSequencesForRandomAccess();
                IEnumerable <ISequence> reads = queryParser.Parse();

                // Comparative Assembly Steps
                // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence)
                this.StatusEventStart(Properties.Resource.ReadAlignmentStarted);
                IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences,
                                                                                                    reads.Where(a => a.Count >= this.LengthOfMum));

                readAlignmentOutputStream = PlatformManager.Services.CreateTempStream();
                WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputStream);
                this.StatusEventEnd(Properties.Resource.ReadAlignmentEnded);

                // 2) Repeat Resolution
                this.StatusEventStart(Properties.Resource.RepeatResolutionStarted);
                DeltaAlignmentSorter sorter;

                unsortedRepeatResolutionOutputStream = PlatformManager.Services.CreateTempStream();
                using (var deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputStream, queryParser))
                {
                    IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputStream);
                }

                this.StatusEventEnd(Properties.Resource.RepeatResolutionEnded);
                this.StatusEventStart(Properties.Resource.SortingResolvedDeltasStarted);

                repeatResolutionOutputStream = PlatformManager.Services.CreateTempStream();
                WriteSortedDelta(sorter, unsortedRepeatResolutionOutputStream, queryParser, repeatResolutionOutputStream);
                this.StatusEventEnd(Properties.Resource.SortingResolvedDeltasEnded);

                // 3) Layout Refinement
                this.StatusEventStart(Properties.Resource.LayoutRefinementStarted);

                layoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream();
                using (var unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeatResolutionOutputStream, queryParser))
                {
                    unsortedLayoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream();
                    IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment);
                    sorter = new DeltaAlignmentSorter(refSequences[0].Count);
                    WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputStream);
                    WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputStream, queryParser, layoutRefinmentOutputStream);
                }

                this.StatusEventEnd(Properties.Resource.LayoutRefinementEnded);

                // 4) Consensus Generation
                this.StatusEventStart(Properties.Resource.ConsensusGenerationStarted);
                IList <ISequence> contigs;
                using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputStream, queryParser))
                {
                    contigs = this.ConsensusGenerator(delta).ToList();
                }
                this.StatusEventEnd(Properties.Resource.ConsensusGenerationEnded);

                if (this.ScaffoldingEnabled)
                {
                    // 5) Scaffold Generation
                    this.StatusEventStart(Properties.Resource.ScaffoldGenerationStarted);
                    IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads);
                    this.StatusEventEnd(Properties.Resource.ScaffoldGenerationEnded);
                    return(scaffolds);
                }
                else
                {
                    return(contigs);
                }
            }
            finally
            {
                // Cleanup temp files.
                if (readAlignmentOutputStream != null)
                {
                    readAlignmentOutputStream.Dispose();
                }
                if (unsortedRepeatResolutionOutputStream != null)
                {
                    unsortedRepeatResolutionOutputStream.Dispose();
                }
                if (repeatResolutionOutputStream != null)
                {
                    repeatResolutionOutputStream.Dispose();
                }
                if (unsortedLayoutRefinmentOutputStream != null)
                {
                    unsortedLayoutRefinmentOutputStream.Dispose();
                }
                if (layoutRefinmentOutputStream != null)
                {
                    layoutRefinmentOutputStream.Dispose();
                }
            }
        }