Пример #1
0
        public void TestPcaStep3DeletionInReferenceTwo()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "CTACGATCGGGG");
            //                                               CTACGTGC         //   TGCGCA is deleted from reference
            //                                                  GCATCG
            //                                                 AGCATC
            //                                                       GGGG
            //                                                   CATCG
            Sequence q = new Sequence(DnaAlphabet.Instance, "CTACGTGC");
            Sequence q2 = new Sequence(DnaAlphabet.Instance, "GCATCG");
            Sequence q3 = new Sequence(DnaAlphabet.Instance, "GGGG");
            Sequence q4 = new Sequence(DnaAlphabet.Instance, "CATCG");

            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 };
            var output = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q, q2, q3, q4 });
            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("CTACGTGCATCGGGG", res);
        }
Пример #2
0
        public void TestPcaStep2WithTwoReads()
        {
            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9, KmerLength = 9 };

            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC");
            q.ID = ">read.F:TestPcaStep2WithTwoReads";

            Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG");
            p.ID = ">read.R:TestPcaStep2WithTwoReads";

            CloneLibrary.Instance.AddLibrary("TestPcaStep2WithTwoReads", (float)61, (float)1);

            TestPcaAssemble(asm,
                new List<Sequence>{
                    new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG", false)
                },
                new List<ISequence> { q,p },
                new List<string> { 
                    "AACCTTGGCC",
                    "GGGGGGGGGG"
                });
        }
Пример #3
0
        /// <summary>
        /// Validates Assemble method .Step 1-5.        
        /// </summary>
        /// <param name="nodeName">Parent Node name in Xml</param>
        /// <param name="isFilePath">Sequence location.</param>
        public void ValidateComparativeAssembleMethod(string nodeName, bool isEcOli)
        {
            ComparativeGenomeAssembler assemble = new ComparativeGenomeAssembler();
            List<ISequence> referenceSeqList = new List<ISequence>();
            string expectedSequence = null;
            string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.MUMLengthNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.KmerLengthNode);
            string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.FixedSeparationNode);

            string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.MinimumScoreNode);
            string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.SeparationFactorNode);
            string maximumSeparation = utilityObj.xmlUtil.GetTextValue(nodeName,
                     Constants.MaximumSeparationNode);
            string breakLength = utilityObj.xmlUtil.GetTextValue(nodeName,
                   Constants.BreakLengthNode);
            // Gets the reference sequence from the FastA file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.FilePathNode1);

            Assert.IsNotNull(filePath);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                "Comparative P1 : Successfully validated the File Path '{0}'.", filePath));

            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> referenceList = parser.Parse();

                foreach (ISequence seq in referenceList)
                {
                    referenceSeqList.Add(seq);
                }
            }

            //Get the reads from configurtion file .
            string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.FilePathNode2);
            assemble.LengthOfMum = int.Parse(LengthOfMUM, CultureInfo.InvariantCulture);
            assemble.KmerLength = int.Parse(kmerLength, CultureInfo.InvariantCulture);
            assemble.FixedSeparation = int.Parse(fixedSeparation, CultureInfo.InvariantCulture);
            assemble.MinimumScore = int.Parse(minimumScore, CultureInfo.InvariantCulture);
            assemble.SeparationFactor = float.Parse(separationFactor, CultureInfo.InvariantCulture);
            assemble.MaximumSeparation = int.Parse(maximumSeparation, CultureInfo.InvariantCulture);
            assemble.BreakLength = int.Parse(breakLength, CultureInfo.InvariantCulture);

            using (FastASequencePositionParser queryparser = new FastASequencePositionParser(readFilePath))
            {
                IEnumerable<ISequence> outputAssemble = assemble.Assemble(referenceSeqList, queryparser);

                if (isEcOli)
                {
                    expectedSequence = utilityObj.xmlUtil.GetFileTextValue(nodeName,
                                        Constants.ExpectedSequenceNode);
                }
                else
                {
                    expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName,
                                        Constants.ExpectedSequenceNode);
                }

                var outputStrings = outputAssemble.Select(seq => seq.ConvertToString()).ToList();
                outputStrings.Sort();
                Assert.AreEqual(expectedSequence.ToUpperInvariant(), String.Join("", outputStrings).ToUpperInvariant());
            }
        }
Пример #4
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            if (this.FilePath.Length != 2)
            {
                Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required.");
                return;
            }

            TimeSpan timeSpan = new TimeSpan();
            Stopwatch runAlgorithm = new Stopwatch();
            FileInfo inputFileinfo = new FileInfo(this.FilePath[0]);
            long inputFileLength = inputFileinfo.Length;
            inputFileinfo = null;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();

            // Parse input files
            IEnumerable<ISequence> referenceSequences = ParseFile(this.FilePath[0]);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time : {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size            : {0}", inputFileLength);
            }

            inputFileinfo = new FileInfo(this.FilePath[1]);
            inputFileLength = inputFileinfo.Length;
            runAlgorithm.Restart();
            FastASequencePositionParser queryParser;
            using (var stream = File.OpenRead(this.FilePath[1]))
            {
                // Parse and cache the sequences.
                queryParser = new FastASequencePositionParser(stream, true);
                queryParser.CacheSequencesForRandomAccess();
            }

            // Check the input
            var reads = queryParser.Parse();
            if (reads.Any(s => s.Alphabet.HasAmbiguity))
                throw new ArgumentException(Resources.AmbiguousReadsNotSupported);

            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed reads file   : {0}", Path.GetFullPath(this.FilePath[1]));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", inputFileLength);
            }

            runAlgorithm.Restart();


            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose);
            }

            runAlgorithm.Restart();
            ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler();
            assembler.StatusChanged += this.AssemblerStatusChanged;
            assembler.ScaffoldingEnabled = this.Scaffold;
            assembler.KmerLength = this.KmerLength;
            assembler.LengthOfMum = this.MumLength;
            IEnumerable<ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser);
            runAlgorithm.Stop();
            timeSpan = timeSpan.Add(runAlgorithm.Elapsed);

            runAlgorithm.Restart();

            this.WriteContigs(assemblerResult);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan);
                Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed);
            }
        }
Пример #5
0
        /// <summary>
        /// Test Pca assemble.
        /// </summary>
        /// <param name="asm">Comparative Genome Assembler.</param>
        /// <param name="reference">Reference sequence.</param>
        /// <param name="query">Query sequence.</param>
        /// <param name="expected">Expected strings.</param>
        private static void TestPcaAssemble(ComparativeGenomeAssembler asm, IEnumerable<ISequence> reference, IEnumerable<ISequence> query, IList<string> expected)
        {
            IEnumerable<ISequence> result = asm.Assemble(reference, query);

            Assert.IsTrue(result.Count() == expected.Count);

            foreach (var act in result)
            {
                string actualStr = new string(act.Select(a => (char)a).ToArray());
                Assert.IsTrue(expected.Contains(actualStr));
            }
        }
Пример #6
0
        public void TestPcaStep4WithOverlappingReads()
        {
            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 3 };

            TestPcaAssemble(asm,
                new List<Sequence>{
                    new Sequence(DnaAlphabet.Instance, "AGAAAAGTTTTCA", false)
                },
                new List<ISequence> { 
                    new Sequence(DnaAlphabet.Instance, "AGAAAA", false) ,
                    new Sequence(DnaAlphabet.Instance, "AAAAGTTTT", false)
                },
                new List<string>
                {
                    "AGAAAAGTTTT"
                });
        }
Пример #7
0
        public void TestPcaStep3RepeatTest()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCCCCACGATCGCGCTAGATCGCATCGATCCCCAACCTTGGCCGGGGGGGGGG");

            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCC");
            q.ID = ">read.F:abc";
            Sequence p = new Sequence(DnaAlphabet.Instance, "GGGGGGGGGG");
            p.ID = ">read.R:abc";
            CloneLibrary.Instance.AddLibrary("abc", (float)61, (float)1);


            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 };
            var res = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q, p });
            string[] expectedResult = new string[2];
            expectedResult[0] = "AACCTTGGCC";
            expectedResult[1] = "GGGGGGGGGG";
            int i = 0;
            foreach (var s in res)
            {
                string actual = new string(s.Select(a => (char)a).ToArray());
                Assert.AreEqual(expectedResult[i], actual);
                i++;
            }
        }
Пример #8
0
        public void TestPcaStep3InsertionInReference()
        {
            Sequence r = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTAGTACGGATATTGCCCACGATCG");

            //                                               AACCTTGGCCTA            CCCACGATCG
            Sequence q = new Sequence(DnaAlphabet.Instance, "AACCTTGGCCTACCCACGATCG");


            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 9 };
            var output = asm.Assemble(new List<ISequence> { r }, new List<ISequence> { q });
            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());

            Assert.AreEqual("AACCTTGGCCTACCCACGATCG", res);
        }
Пример #9
0
        public void TestPcaStep3InsertionInReferenceThree()
        {
            Sequence refSeq = new Sequence(DnaAlphabet.Instance, "AAAACCCGGGGTTTTTTACGTGACTGCA");
            Sequence q = new Sequence(DnaAlphabet.Instance, "AAAAGGGG");
            Sequence r = new Sequence(DnaAlphabet.Instance, "ACGTTGCA");

            ComparativeGenomeAssembler asm = new ComparativeGenomeAssembler() { LengthOfMum = 4 };
            var output = asm.Assemble(new List<ISequence> { refSeq }, new List<ISequence> { q, r });

            string res = new string(output.ElementAt(0).Select(a => (char)a).ToArray());
            Assert.AreEqual("AAAAGGGG", res);

            res = new string(output.ElementAt(1).Select(a => (char)a).ToArray());
            Assert.AreEqual("ACGTTGCA", res);
        }
Пример #10
0
        /// <summary>
        /// Validates Assemble method .Step 1-5.        
        /// </summary>
        /// <param name="nodeName">Parent Node name in Xml</param>
        public void ValidateComparativeAssembleMethod(string nodeName)
        {
            ComparativeGenomeAssembler assemble = new ComparativeGenomeAssembler();
            List<ISequence> referenceSeqList;
            StringBuilder expectedSequence = new StringBuilder(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode));

            string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode);
            string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode);
            string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode);
            string maximumSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaximumSeparationNode);
            string breakLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BreakLengthNode);

            // Gets the reference sequence from the FastA file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1);

            Assert.IsNotNull(filePath);
            ApplicationLog.WriteLine(string.Format(null, "Comparative BVT : Successfully validated the File Path '{0}'.", filePath));

            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> referenceList = parser.Parse();
                Assert.IsNotNull(referenceList);
                referenceSeqList = new List<ISequence>(referenceList);
            }

            // Get the reads from configuration file.
            string readFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2);

            assemble.LengthOfMum = int.Parse(LengthOfMUM,  CultureInfo.InvariantCulture);
            assemble.KmerLength = int.Parse(kmerLength,  CultureInfo.InvariantCulture);
            assemble.ScaffoldingEnabled = true;
            assemble.FixedSeparation = int.Parse(fixedSeparation, CultureInfo.InvariantCulture);
            assemble.MinimumScore = int.Parse(minimumScore, CultureInfo.InvariantCulture);
            assemble.SeparationFactor = float.Parse(separationFactor, CultureInfo.InvariantCulture);
            assemble.MaximumSeparation = int.Parse(maximumSeparation, CultureInfo.InvariantCulture);
            assemble.BreakLength = int.Parse(breakLength, CultureInfo.InvariantCulture);

            using (var queryparser = new FastASequencePositionParser(readFilePath))
            {
                IEnumerable<ISequence> output = assemble.Assemble(referenceSeqList, queryparser);
                StringBuilder longOutput = new StringBuilder();
                foreach (string x in output.Select(seq => seq.ConvertToString()).OrderBy(c => c))
                    longOutput.Append(x);

                Assert.AreEqual(expectedSequence.ToString().ToUpperInvariant(), longOutput.ToString().ToUpperInvariant());
            }
        }