Exemple #1
0
        /// <summary>
        /// Validate scaffold sequence for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateScaffoldSequence(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean);
            string inputRedundancy = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InputRedundancy);
            string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldSeq);
            string[] scaffoldSeqNodes = expectedSeq.Split(',');

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null),
                 float.Parse(stdDeviation, null));
                IEnumerable<ISequence> scaffoldSeq;

                using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder())
                {
                    scaffoldSeq = scaffold.BuildScaffold(
                       sequenceReads, contigs.ToList(), this.KmerLength, redundancy: Int32.Parse(inputRedundancy, null));
                }

                AlignmentHelpers.CompareSequenceLists(new HashSet<string>(scaffoldSeqNodes), scaffoldSeq.ToList());
            }

            ApplicationLog.WriteLine("PADENA P1 : Scaffold sequence : validation for Padena step6:step8 completed successfully");
        }
 /// <summary>
 /// Build scaffolds from contigs and paired reads (uses Padena Step 6 for assembly).
 /// </summary>
 /// <param name="contigs">List of contigs.</param>
 /// <param name="reads">List of paired reads.</param>
 /// <returns>List of scaffold sequences.</returns>
 private IEnumerable<ISequence> ScaffoldsGenerator(IEnumerable<ISequence> contigs, IEnumerable<ISequence> reads)
 {
     using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder())
     {
         return scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.ScaffoldRedundancy);
     }
 }
Exemple #3
0
        /// <summary>
        /// Refine layout in the delta alignments.
        /// </summary>
        public void GenerateScaffolds()
        {
            TimeSpan timeSpan = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();
            FileInfo inputFileinfo = new FileInfo(this.FilePath[0]);
            long inputFileLength = inputFileinfo.Length;
            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();
            FastAParser parser = new FastAParser();
            IEnumerable<ISequence> contigs = parser.Parse(this.FilePath[0]);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed Contig file: {0}", Path.GetFullPath(this.FilePath[0]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            inputFileinfo = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;

            runAlgorithm.Restart();
            FastAParser readParser = new FastAParser();
            IEnumerable<ISequence> reads = readParser.Parse(this.FilePath[1]);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Console.Error.WriteLine();
                Console.Error.WriteLine("  Processed reads file: {0}", Path.GetFullPath(this.FilePath[1]));
                Console.Error.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.Error.WriteLine("            File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();
            IEnumerable<ISequence> scaffolds = null;
            using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder())
            {
                 scaffolds = scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.Redundancy);
            } 
            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();
            this.WriteSequences(scaffolds);
            runAlgorithm.Stop();


            if (this.Verbose)
            {
                Console.Error.WriteLine("  Compute time: {0}", timeSpan);
                Console.Error.WriteLine("  Write() time: {0}", runAlgorithm.Elapsed);
            }
        }
        /// <summary>
        /// Step 6: Build scaffolds from contig list and paired reads.
        /// </summary>
        /// <param name="contigs">List of contigs.</param>
        /// <returns>List of scaffold sequences.</returns>
        protected IList<ISequence> BuildScaffolds(IList<ISequence> contigs)
        {
            if (ScaffoldBuilder == null)
            {
                // Scaffold Builder is a required module for this method. Set this to default.
                ScaffoldBuilder = new GraphScaffoldBuilder();
            }

            return ScaffoldBuilder.BuildScaffold(SequenceReads, contigs, KmerLength, Depth, ScaffoldRedundancy);
        }
Exemple #5
0
        /// <summary>
        /// Generates the Scaffold.
        /// </summary>
        public void GenerateScaffold()
        {
            Output.WriteLine(OutputLevel.Information, Resources.ScaffoldStarting);

            if (this.FileNames.Length != 2)
            {
                Output.WriteLine(OutputLevel.Error, "\nError: A reference file and 1 query file are required.");
                Output.WriteLine(OutputLevel.Required, Resources.ScaffoldHelp);
                return;
            }

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            TimeSpan algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();
            FileInfo refFileinfo = null;

            using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder())
            {
                refFileinfo = new FileInfo(this.FileNames[0]);
                long refFileLength = refFileinfo.Length;

                runAlgorithm.Restart();
                IEnumerable<ISequence> contigs = AssembleArguments.ParseFile(this.FileNames[0]);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Processed contigs file : {0}", Path.GetFullPath(this.FileNames[0]));
                    Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                    Output.WriteLine(OutputLevel.Verbose, "   k-mer Length        : {0}", this.KmerLength);
                }

                refFileinfo = new FileInfo(this.FileNames[1]);
                refFileLength = refFileinfo.Length;

                runAlgorithm.Restart();
                IEnumerable<ISequence> reads = AssembleArguments.ParseFile(this.FileNames[1]);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Processed reads file   : {0}", Path.GetFullPath(this.FileNames[1]));
                    Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                }

                runAlgorithm.Restart();
                ValidateAmbiguousReads(reads);
                runAlgorithm.Stop();

                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                IList<ISequence> scaffolds = scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.Redundancy);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                WriteContigs(scaffolds);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan);
                }
            }
        }