예제 #1
0
        /// <summary>
        /// Assemble the list of sequence reads.
        /// </summary>
        /// <param name="inputSequences">List of input sequences.</param>
        /// <returns>Assembled output.</returns>
        public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences)
        {
            if (inputSequences == null)
            {
                throw new ArgumentNullException("inputSequences");
            }

            this.sequenceReads = inputSequences;

            // Remove ambiguous reads and set up fields for assembler process
            this.Initialize();

            // Step 1, 2: Create k-mers from reads and build de bruijn graph
            this.CreateGraphStarted();
            this.CreateGraph();
            this.CreateGraphEnded();

            // Estimate and set default value for erosion and coverage thresholds
            this.EstimateDefaultValuesStarted();
            this.EstimateDefaultThresholds();
            this.EstimateDefaultValuesEnded();

            // Step 3: Remove dangling links from graph
            this.UndangleGraphStarted();
            this.UnDangleGraph();
            this.UndangleGraphEnded();

            // Step 4: Remove redundant paths from graph
            this.RemoveRedundancyStarted();
            this.RemoveRedundancy();
            this.RemoveRedundancyEnded();

            // Perform dangling link purger step once more.
            // This is done to remove any links created by redundant paths purger.
            this.statusMessage = string.Format(CultureInfo.CurrentCulture, "\n UndangleGraph - Start time: {0}", DateTime.Now);
            this.RaiseStatusEvent();
            this.UnDangleGraph();
            this.statusMessage = string.Format(CultureInfo.CurrentCulture, "\n UndangleGraph - End time: {0}", DateTime.Now);
            this.RaiseStatusEvent();

            // Step 5: Build Contigs
            this.BuildContigsStarted();
            IEnumerable <ISequence> contigSequences = this.BuildContigs();

            this.BuildContigsEnded();

            PadenaAssembly result = new PadenaAssembly();

            result.AddContigs(contigSequences);

            return(result);
        }
예제 #2
0
        /// <summary>
        /// Assemble the list of sequence reads. Also performs the
        /// scaffold building step as part of assembly process.
        /// </summary>
        /// <param name="inputSequences">List of input sequences.</param>
        /// <param name="includeScaffolds">Boolean indicating whether scaffold building step has to be run.</param>
        /// <returns>Assembled output.</returns>
        public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences, bool includeScaffolds)
        {
            PadenaAssembly assemblyResult = (PadenaAssembly)this.Assemble(inputSequences);

            if (includeScaffolds)
            {
                // Step 6: Build _scaffolds
                IList <ISequence> scaffolds = this.BuildScaffolds(assemblyResult.ContigSequences);

                if (scaffolds != null)
                {
                    assemblyResult.AddScaffolds(scaffolds);
                }
            }

            return(assemblyResult);
        }
예제 #3
0
        /// <summary>
        /// Assemble the list of sequence reads.
        /// </summary>
        /// <param name="inputSequences">List of input sequences.</param>
        /// <returns>Assembled output.</returns>
        public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences)
        {
            if (inputSequences == null)
            {
                throw new ArgumentNullException("inputSequences");
            }

            this.sequenceReads = inputSequences;

            // Remove ambiguous reads and set up fields for assembler process
            this.Initialize();

            // Step 1, 2: Create k-mers from reads and build de bruijn graph
            this.CreateGraph();

            // Estimate and set default value for erosion and coverage thresholds
            this.EstimateDefaultThresholds();

            // Step 3: Remove dangling links from graph
            this.UnDangleGraph();

            // Step 4: Remove redundant paths from graph
            this.RemoveRedundancy();

            // Perform dangling link purger step once more.
            // This is done to remove any links created by redundant paths purger.
            this.UnDangleGraph();

            // Step 5: Build Contigs
            IEnumerable <ISequence> contigSequences = this.BuildContigs();

            PadenaAssembly result = new PadenaAssembly();

            result.AddContigs(contigSequences);

            return(result);
        }
예제 #4
0
        /// <summary>
        /// Assemble the list of sequence reads.
        /// </summary>
        /// <param name="inputSequences">List of input sequences.</param>
        /// <returns>Assembled output.</returns>
        public virtual IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences)
        {
            if (inputSequences == null)
            {
                throw new ArgumentNullException("inputSequences");
            }

            this._sequenceReads = inputSequences;

            // Remove ambiguous reads and set up fields for assembler process
            this.Initialize();

            // Step 1, 2: Create k-mers from reads and build de bruijn graph
            Stopwatch sw = Stopwatch.StartNew();

            this.CreateGraphStarted();
            this.CreateGraph();
            sw.Stop();

            this.CreateGraphEnded();
            this.TaskTimeSpanReport(sw.Elapsed);
            this.NodeCountReport();

            // Estimate and set default value for erosion and coverage thresholds
            sw = Stopwatch.StartNew();
            this.EstimateDefaultValuesStarted();
            this.EstimateDefaultThresholds();
            sw.Stop();

            this.EstimateDefaultValuesEnded();
            this.TaskTimeSpanReport(sw.Elapsed);

            // Step 3: Remove dangling links from graph
            sw = Stopwatch.StartNew();
            this.UndangleGraphStarted();
            this.UnDangleGraph();
            sw.Stop();

            this.UndangleGraphEnded();
            this.TaskTimeSpanReport(sw.Elapsed);
            this.NodeCountReport();

            // Step 4: Remove redundant paths from graph
            sw = Stopwatch.StartNew();
            this.RemoveRedundancyStarted();
            this.RemoveRedundancy();
            this.NodeCountReport();

            // Perform dangling link purger step once more.
            // This is done to remove any links created by redundant paths purger.
            this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphStarted, DateTime.Now);
            this.RaiseStatusEvent();
            this.UnDangleGraph();
            this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphEnded, DateTime.Now);
            this.RaiseStatusEvent();

            // Report end after undangle
            sw.Stop();
            this.RemoveRedundancyEnded();
            this.TaskTimeSpanReport(sw.Elapsed);
            this.NodeCountReport();

            // Step 5: Build Contigs
            sw = Stopwatch.StartNew();
            this.BuildContigsStarted();
            IEnumerable <ISequence> contigSequences = this.BuildContigs();

            sw.Stop();

            this.BuildContigsEnded();
            this.TaskTimeSpanReport(sw.Elapsed);

            PadenaAssembly result = new PadenaAssembly();

            result.AddContigs(contigSequences);

            return(result);
        }
예제 #5
0
        /// <summary>
        /// Assemble the list of sequence reads.
        /// </summary>
        /// <param name="inputSequences">List of input sequences.</param>
        /// <returns>Assembled output.</returns>
        public virtual IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences)
        {
            if (inputSequences == null)
            {
                throw new ArgumentNullException("inputSequences");
            }

            this.sequenceReads = inputSequences;

            CancellationTokenSource cts = new CancellationTokenSource();
            ReportIntermediateProgress(cts.Token);

            try
            {
                // Remove ambiguous reads and set up fields for assembler process
                Initialize();

                // Step 1, 2: Create k-mers from reads and build de bruijn graph
                Stopwatch sw = Stopwatch.StartNew();
                CreateGraphStarted();
                CreateGraph();
                sw.Stop();

                CreateGraphEnded();
                TaskTimeSpanReport(sw.Elapsed);
                NodeCountReport();

                // Estimate and set default value for erosion and coverage thresholds
                sw = Stopwatch.StartNew();
                EstimateDefaultValuesStarted();
                EstimateDefaultThresholds();
                sw.Stop();

                EstimateDefaultValuesEnded();
                TaskTimeSpanReport(sw.Elapsed);

                // Step 3: Remove dangling links from graph
                sw = Stopwatch.StartNew();
                UndangleGraphStarted();
                UnDangleGraph();
                sw.Stop();

                UndangleGraphEnded();
                TaskTimeSpanReport(sw.Elapsed);
                NodeCountReport();

                // Step 4: Remove redundant paths from graph
                sw = Stopwatch.StartNew();
                RemoveRedundancyStarted();
                RemoveRedundancy();
                NodeCountReport();

                // Perform dangling link purger step once more.
                // This is done to remove any links created by redundant paths purger.
                StatusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphStarted, DateTime.Now);
                UnDangleGraph();
                StatusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphEnded, DateTime.Now);

                // Report end after undangle
                sw.Stop();
                RemoveRedundancyEnded();
                TaskTimeSpanReport(sw.Elapsed);
                NodeCountReport();

                // Step 5: Build Contigs
                sw = Stopwatch.StartNew();
                BuildContigsStarted();
                IEnumerable<ISequence> contigSequences = BuildContigs();
                sw.Stop();

                BuildContigsEnded();
                TaskTimeSpanReport(sw.Elapsed);

                PadenaAssembly result = new PadenaAssembly();
                result.AddContigs(contigSequences);

                return result;
            }
            finally
            {
                cts.Cancel();
            }
        }
예제 #6
0
        /// <summary>
        /// Validate ParallelDenovoAssembler class properties.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ParallelDenovoAssemblyProperties(string nodeName)
        {
            // Get values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName);
            string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate the contigs
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();
                this.RedundantPathsPurger =
                    new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                this.RemoveRedundancy();
                this.ContigBuilder = new SimplePathContigBuilder();

                // Build contigs
                IEnumerable<ISequence> contigs = this.BuildContigs();

                CloneLibraryInformation cloneLibInfoObj = new CloneLibraryInformation();
                cloneLibInfoObj.LibraryName = library;
                cloneLibInfoObj.MeanLengthOfInsert = float.Parse(mean, (IFormatProvider)null);
                cloneLibInfoObj.StandardDeviationOfInsert = float.Parse(StdDeviation, (IFormatProvider)null);

                // Build scaffolds.
                CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, (IFormatProvider)null),
                float.Parse(StdDeviation, (IFormatProvider)null));

                IEnumerable<ISequence> scaffolds = BuildScaffolds(contigs.ToList());
                PadenaAssembly denovoAssembly = new PadenaAssembly();

                denovoAssembly.AddContigs(contigs);
                denovoAssembly.AddScaffolds(scaffolds);

                Assert.AreEqual(denovoAssembly.ContigSequences.Count(),
                    contigs.Count());
                Assert.AreEqual(denovoAssembly.Scaffolds.Count(), scaffolds.Count());
                Assert.IsNull(denovoAssembly.Documentation);

                // Validates the Clone Library for the existing clone
                CloneLibraryInformation actualObj = CloneLibrary.Instance.GetLibraryInformation(library);
                Assert.IsTrue(actualObj.Equals(cloneLibInfoObj));

                ApplicationLog.WriteLine("CloneLibraryInformation Equals() is successfully validated");
            }

            // Validate ParallelDenovoAssembler properties.
            ApplicationLog.WriteLine(
                @"Padena BVT : Validated ParallelDenovo Assembly properties");
        }