Ejemplo n.º 1
0
        /// <summary>
        /// Validate Assembled paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateAssembledPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DepthNode);
            string expectedScaffoldPathCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldPathCount);
            string[] assembledPath = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencePathNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); ;
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();
                IList<ISequence> listContigs = contigs.ToList();

                IList<ISequence> sortedContigs = SortContigsData(listContigs);
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null));
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();

                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null), Int32.Parse(expectedDepth, null));

                Assert.AreEqual(expectedScaffoldPathCount, paths.Count.ToString((IFormatProvider)null));

                // Assemble paths.
                PathPurger pathsAssembler = new PathPurger();
                pathsAssembler.PurgePath(paths);

                // Get sequences from assembled path.
                IList<ISequence> seqList = paths.Select(temp => temp.BuildSequenceFromPath(graph, Int32.Parse(kmerLength, null))).ToList();

                // Validate assembled sequence paths.
            foreach (string sequence in seqList.Select(t => t.ConvertToString())) {
                Assert.IsTrue (assembledPath.Contains (sequence), "Could not locate " + sequence);
            }


            ApplicationLog.WriteLine(
                "Padena BVT : AssemblePath() validation for Padena step6:step7 completed successfully");
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Validate scaffold paths for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateScaffoldPath(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string[] expectedScaffoldNodes = utilityObj.xmlUtil.GetTextValues(nodeName,Constants.ScaffoldNodes);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean);
            string expectedDepth = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.DepthNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                ContigGraph graph = new ContigGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null));

                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                DistanceCalculator dist = new DistanceCalculator(contigpairedReads);
                dist.CalculateDistance();

                graph.BuildContigGraph(contigs.ToList(), this.KmerLength);

                // Validate ScaffoldPath using BFS.
                TracePath trace = new TracePath();
                IList<ScaffoldPath> paths = trace.FindPaths(graph, contigpairedReads, Int32.Parse(kmerLength, null),
                                                            Int32.Parse(expectedDepth, null));

                ScaffoldPath scaffold = paths.First();

                foreach (KeyValuePair<Node, Edge> kvp in scaffold)
                {
                    ISequence seq = graph.GetNodeSequence(kvp.Key);
                    string sequence = seq.ConvertToString();
                    string reversedSequence = seq.GetReverseComplementedSequence().ConvertToString();

                    Assert.IsTrue(expectedScaffoldNodes.Contains(sequence)
                               || expectedScaffoldNodes.Contains(reversedSequence),
                               "Failed to find " + sequence + ", or " + reversedSequence);
                }


            ApplicationLog.WriteLine("PADENA P1 : FindPaths() validation for Padena step6:step6 completed successfully");
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Validate Parallel Denovo Assembly Assembled sequences.
        /// </summary>
        /// <param name="nodeName">XML node used to validate different test scenarios</param>
        internal void ValidatePadenaAssembledSeqs(string nodeName)
        {
            // Get values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
               nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.RedundantThreshold);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.Mean);
            string assembledSequences = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.SequencePathNode);
            string assembledSeqCount = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.AssembledSeqCountNode);
            string[] updatedAssembledSeqs = assembledSequences.Split(',');

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Create a ParallelDeNovoAssembler instance.
                ParallelDeNovoAssembler denovoObj = null;
                try
                {
                    denovoObj = new ParallelDeNovoAssembler();

                    denovoObj.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                    denovoObj.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                    denovoObj.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);

                    CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, (IFormatProvider)null),
                    float.Parse(stdDeviation, (IFormatProvider)null));

                    byte[] symbols = sequenceReads.ElementAt(0).Alphabet.GetSymbolValueMap();

                    IDeNovoAssembly assembly =
                        denovoObj.Assemble(sequenceReads.Select(a => new Sequence(Alphabets.DNA, a.Select(b => symbols[b]).ToArray()) { ID = a.ID }), true);

                    IList<ISequence> assembledSequenceList = assembly.AssembledSequences.ToList();

                    // Validate assembled sequences.
                    Assert.AreEqual(assembledSeqCount, assembledSequenceList.Count.ToString((IFormatProvider)null));

                    for (int i = 0; i < assembledSequenceList.Count; i++)
                    {
                        Assert.IsTrue(assembledSequences.Contains(
                       new string(assembledSequenceList[i].Select(a => (char)a).ToArray()))
                        || updatedAssembledSeqs.Contains(
                        new string(assembledSequenceList[i].GetReverseComplementedSequence().Select(a => (char)a).ToArray())));
                    }
                }
                finally
                {
                    if (denovoObj != null)
                        denovoObj.Dispose();
                }


            ApplicationLog.WriteLine("Padena P1 : Assemble() validation for Padena step6:step7 completed successfully");
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Validate Add library information in existing libraries.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="IsLibraryInfo">Is library info?</param>
        internal void AddLibraryInformation(string nodeName, bool IsLibraryInfo)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string expectedPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.PairedReadsCountNode);
            string[] backwardReadsNode = utilityObj.xmlUtil.GetTextValues(nodeName,
              Constants.BackwardReadsNode);
            string[] forwardReadsNode = utilityObj.xmlUtil.GetTextValues(nodeName,
              Constants.ForwardReadsNode);
            string expectedLibraray = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.LibraryName);
            string expectedStdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.Mean);

            IList<ISequence> sequenceReads = new List<ISequence>();
            IList<MatePair> pairedreads = new List<MatePair>();

            // Get the input reads 
            IEnumerable<ISequence> sequences = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequences = parser.Parse().ToList();
            parser.Close ();
                foreach (ISequence seq in sequences)
                {
                    sequenceReads.Add(seq);
                }

                // Add a new library infomration.
                if (IsLibraryInfo)
                {
                    CloneLibraryInformation libraryInfo =
                      new CloneLibraryInformation();
                    libraryInfo.LibraryName = expectedLibraray;
                    libraryInfo.MeanLengthOfInsert = float.Parse(mean, (IFormatProvider)null);
                    libraryInfo.StandardDeviationOfInsert = float.Parse(expectedStdDeviation, (IFormatProvider)null);
                    CloneLibrary.Instance.AddLibrary(libraryInfo);
                }
                else
                {
                    CloneLibrary.Instance.AddLibrary(expectedLibraray,
                        float.Parse(mean, (IFormatProvider)null), float.Parse(expectedStdDeviation, (IFormatProvider)null));
                }

                // Convert reads to map paired reads.
                MatePairMapper pair = new MatePairMapper();
                pairedreads = pair.Map(sequenceReads);

                // Validate Map paired reads.
                Assert.AreEqual(expectedPairedReadsCount, pairedreads.Count.ToString((IFormatProvider)null));

                for (int index = 0; index < pairedreads.Count; index++)
                {
                    Assert.IsTrue(forwardReadsNode.Contains(new string(pairedreads[index].GetForwardRead(sequenceReads).Select(a => (char)a).ToArray())));
                    Assert.IsTrue(backwardReadsNode.Contains(new string(pairedreads[index].GetReverseRead(sequenceReads).Select(a => (char)a).ToArray())));
                    Assert.AreEqual(expectedStdDeviation,
                      pairedreads[index].StandardDeviationOfLibrary.ToString((IFormatProvider)null));
                    Assert.AreEqual(expectedLibraray, pairedreads[index].Library.ToString((IFormatProvider)null));
                    Assert.AreEqual(mean, pairedreads[index].MeanLengthOfLibrary.ToString((IFormatProvider)null));
                }


            ApplicationLog.WriteLine(@"Padena P1 : Map paired reads has been verified successfully");
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Validate Filter contig nodes.
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        /// <param name="isFirstContig">Is First Contig?</param>
        internal void ValidateFilterPaired(string nodeName, bool isFirstContig)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string forwardReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ForwardReadStartPos);
            string reverseReadStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ReverseReadStartPos);
            string reverseComplementStartPos = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RerverseReadReverseCompPos);
            string[] expectedForwardReadStartPos = forwardReadStartPos.Split(',');
            string[] expectedReverseReadStartPos = reverseReadStartPos.Split(',');
            string[] expectedReverseComplementStartPos = reverseComplementStartPos.Split(',');

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(
                    sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);


                Assert.AreEqual(expectedContigPairedReadsCount,
                  contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map = null;
                IList<ValidMatePair> valid = null;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                // Validate Contig paired reads after filtering contig sequences.
                if (isFirstContig)
                {

                    map = contigpairedReads[firstSeq];
                    valid = SortPairedReads(map[secondSeq], sequenceReads);
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                    valid = SortPairedReads(map[firstSeq], sequenceReads);
                }

                for (int index = 0; index < valid.Count; index++)
                {
                    Assert.IsTrue((expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedForwardReadStartPos[index] ==
                          valid[index].ForwardReadStartPosition[1].ToString((IFormatProvider)null))));

                    if (valid[index].ReverseReadReverseComplementStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseReadStartPos[index] ==
                          valid[index].ReverseReadReverseComplementStartPosition[1].ToString((IFormatProvider)null))));
                    }

                    if (valid[index].ReverseReadStartPosition.Count > 1)
                    {
                        Assert.IsTrue((expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[0].ToString((IFormatProvider)null)
                          || (expectedReverseComplementStartPos[index] ==
                          valid[index].ReverseReadStartPosition[1].ToString((IFormatProvider)null))));
                    }
                }

            ApplicationLog.WriteLine("PADENA P1 : FilterPairedReads() validation for Padena step6:step4 completed successfully");
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Creates RedundantPathPurger instance by passing pathlength and count. Detect 
        /// redundant error nodes and remove these nodes from the graph. Validate the graph.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="isMicroOrganism">Is micro organism</param>    
        internal void ValidateRedundantPathPurgerCtor(string nodeName, bool isMicroOrganism)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.ExpectedNodesCountAfterDangling);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;
                this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength);
                this.UnDangleGraph();

                // Create RedundantPathPurger instance, detect redundant nodes and remove error nodes
                RedundantPathsPurger redundantPathPurger =
                  new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                DeBruijnPathList redundantnodelist = redundantPathPurger.DetectErroneousNodes(graph);
                redundantPathPurger.RemoveErroneousNodes(graph, redundantnodelist);

                if (isMicroOrganism)
                    Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count());
                else
                    ValidateGraph(graph, nodeName);
            
            ApplicationLog.WriteLine(@"Padena P1 :RedundantPathsPurger ctor and methods validation for Padena step4 completed successfully");
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Validate the SimpleContigBuilder Build() method using step 4 graph
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="isChromosomeRC">Is Chromosome RC?</param>
        internal void ValidateSimpleContigBuilderBuild(string nodeName, bool isChromosomeRC)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string expectedContigsString = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.ContigsNode);
            string[] expectedContigs = expectedContigsString.Split(',');
            string expectedContigsCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.ContigsCount);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles from graph in step4
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;
                this.UnDangleGraph();
                this.RemoveRedundancy();

                // Validate the SimpleContigBuilder.Build() by passing graph
                SimplePathContigBuilder builder = new SimplePathContigBuilder();
                IList<ISequence> contigs = builder.Build(graph).ToList();

                if (isChromosomeRC)
                {
                    Assert.AreEqual(expectedContigsCount,
                        contigs.Count.ToString((IFormatProvider)null));
                }
                else
                {
                    // Validate the contigs
                    for (int index = 0; index < contigs.Count; index++)
                    {
                        Assert.IsTrue(expectedContigs.Contains(new string(contigs[index].Select(a => (char)a).ToArray())));
                    }
                }

            ApplicationLog.WriteLine(@"Padena P1 :SimpleContigBuilder.BuildContigs() validation for Padena step5 completed successfully");
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Validate ParallelDeNovothis step1 Build kmers 
        /// </summary>
        /// <param name="nodeName">xml node for test data</param>
        /// <param name="isSmallSize">Is file small size?</param>
        internal void ValidatePadenaBuildKmers(string nodeName, bool isSmallSize)
        {
            // Read all the input sequences from xml config file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string expectedKmersCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedKmersCount);

            // Set kmerLength
            this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Set all the input reads and execute build kmers
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                IEnumerable<KmersOfSequence> lstKmers =
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength);

                if (isSmallSize)
                {
                    Assert.AreEqual(expectedKmersCount, lstKmers.Count().ToString((IFormatProvider)null));
                }
                else
                {
                    ValidateKmersList(new List<KmersOfSequence>(lstKmers), sequenceReads.ToList(), nodeName);
                }


            ApplicationLog.WriteLine(@"Padena P1 : Validation of Build with all input reads using ParallelDeNovothis sequence completed successfully");
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Validate graph generated using ParallelDeNovothis.CreateGraph() with kmers
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="isLargeSizeReads">Is large size reads?</param>
        internal void ValidatePadenaBuildGraph(string nodeName, bool isLargeSizeReads)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string expectedGraphsNodeCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GraphNodesCountNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                this.KmerLength = int.Parse(kmerLength, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully");

                if (isLargeSizeReads)
                    Assert.AreEqual(Int32.Parse(expectedGraphsNodeCount, null), graph.GetNodes().Count());
                else
                    ValidateGraph(graph, nodeName);
            
            ApplicationLog.WriteLine(@"Padena P1 : ParallelDeNovothis CreateGraph() validation for Padena step2 completed successfully");
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Validate SequenceRangeToKmerBuilder Build() method which build kmers
        /// </summary>
        /// <param name="nodeName">xml node name for test data</param>
        internal void ValidateKmerBuilderBuild(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);

            // Get the input reads
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser ();
            parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
            sequenceReads = parser.Parse().ToList();
            parser.Close ();

            // Pass all the input reads and kmerLength to generate kmers
            SequenceToKmerBuilder builder = new SequenceToKmerBuilder();
            IEnumerable<KmersOfSequence> lstKmers = builder.Build(sequenceReads,
              int.Parse(kmerLength, (IFormatProvider)null));

            // Validate kmers list
            ValidateKmersList(new List<KmersOfSequence>(lstKmers),
                new List<ISequence>(sequenceReads), nodeName);
        

            ApplicationLog.WriteLine(
                @"Padena BVT : Validation of Build with all input reads 
                    sequence completed successfully");
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Validate SequenceRangeToKmerBuilder Build() which build kmers 
        /// using one base sequence 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateKmerBuilderBuildWithSequence(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);

            // Get the input reads
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser ();
            parser.Open (filePath);

            sequenceReads = parser.Parse().ToList();
            parser.Close ();
            // Pass each input read and kmerLength
            // Add all the generated kmers to kmer list
            SequenceToKmerBuilder builder = new SequenceToKmerBuilder();
            IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>();
            foreach (ISequence sequence in sequenceReads)
            {
                lstKmers.Add(builder.Build(sequence, int.Parse(kmerLength, (IFormatProvider)null)));
            }

            // Validate all the kmers
            ValidateKmersList(lstKmers, sequenceReads.ToList(), nodeName);


            ApplicationLog.WriteLine(
                @"Padena BVT : Validation of Build with each input read sequence 
                    completed successfully");
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Validate ParallelDeNovoAssembler step1 Build kmers 
        /// </summary>
        /// <param name="nodeName">xml node for test data</param>
        internal void ValidateDe2AssemblerBuildKmers(string nodeName)
        {
            // Read all the input sequences from xml config file
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);

            // set kmerLength
            this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser ();
            parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
            sequenceReads = parser.Parse().ToList();
            parser.Close ();
                this.SequenceReads.Clear();                // set all the input reads and execute build kmers
                this.SetSequenceReads(sequenceReads.ToList());
                IEnumerable<KmersOfSequence> lstKmers =
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads,
                    this.KmerLength);

                ValidateKmersList(new List<KmersOfSequence>(lstKmers),
                   new List<ISequence>(sequenceReads), nodeName);
            

            ApplicationLog.WriteLine(
                @"Padena BVT : Validation of Build with all input reads using 
                    ParallelDeNovoAssembler sequence completed successfully");
        }
Ejemplo n.º 13
0
 public void ValidateReadContigMap()
 {
     // Read all the input sequences from xml config file
     string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SmallChromosomeReadsNode,
       Constants.FilePathNode);
     IEnumerable<ISequence> sequenceReads = null;
     FastAParser parser = new FastAParser ();
     parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
     sequenceReads = parser.Parse().ToList();
     parser.Close ();
     ReadContigMap map = new ReadContigMap(sequenceReads);
     Assert.IsNotNull(map);
     for (int i = 0; i < 10; i++)
     {
         Assert.IsTrue(map.ContainsKey(sequenceReads.ElementAt(0).ID));
     }
 }
Ejemplo n.º 14
0
        /// <summary>
        /// Validate Parallel Denovo Assembly Assembled sequences.
        /// </summary>
        /// <param name="nodeName">XML node used to validate different test scenarios</param>
        /// <param name="isScaffold"></param>
        /// <param name="enableLowerContigRemoval"></param>
        /// <param name="allowErosion"></param>
        internal void ValidatePadenaAssembledSeqs(string nodeName,
            bool isScaffold, bool enableLowerContigRemoval, bool allowErosion)
        {
            // Get values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean);
            string erosionThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ErosionNode);
            string lowCCThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LowCoverageContigNode);
            string expectedSequences = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequencePathNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Create a ParallelDeNovoAssembler instance.
                ParallelDeNovoAssembler assembler = null;
                try
                {
                    assembler = new ParallelDeNovoAssembler
                    {
                        KmerLength = Int32.Parse(kmerLength, null),
                        DanglingLinksThreshold = Int32.Parse(daglingThreshold, null),
                        RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null)
                    };

                    if (enableLowerContigRemoval)
                    {
                        assembler.AllowLowCoverageContigRemoval = enableLowerContigRemoval;
                        assembler.ContigCoverageThreshold = double.Parse(lowCCThreshold, null);
                    }

                    if (allowErosion)
                    {
                        assembler.AllowErosion = true;
                        assembler.ErosionThreshold = Int32.Parse(erosionThreshold, null);
                    }

                    CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, null),
                        float.Parse(stdDeviation, null));

                    IDeNovoAssembly assembly = assembler.Assemble(sequenceReads.ToList(), isScaffold);
                    IList<ISequence> assembledSequenceList = assembly.AssembledSequences.ToList();

                    HashSet<string> expected = new HashSet<string>(expectedSequences.Split(',').Select(s => s.Trim()));
                    AlignmentHelpers.CompareSequenceLists(expected, assembledSequenceList);

                    ApplicationLog.WriteLine("Padena BVT : Assemble() validation for Padena step6:step7 completed successfully");
                }
                finally
                {
                    if (assembler != null)
                        assembler.Dispose();
                }

        }
Ejemplo n.º 15
0
        /// <summary>
        /// Validate the DeBruijnNode ctor by passing the kmer and validating 
        /// the node object.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeCtor(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.NodeExtensionsCountNode);
            string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmersCountNode);
            string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.LeftNodeExtensionsCountNode);
            string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.RightNodeExtensionsCountNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build the kmers using this
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validate DeBruijnNode class properties.
                Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null));
                Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));


            ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully");
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Validate ParallelDeNovothis.RemoveRedundancy() which removes bubbles formed in the graph
        /// and validate the graph
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="defaultThreshold">Is Default Threshold?</param>
        /// <param name="isMicroorganism">Is micro organsm?</param>
        internal void ValidatePadenaRemoveRedundancy(string nodeName, bool defaultThreshold, bool isMicroorganism)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ExpectedNodesCountRemoveRedundancy);

            string danglingThreshold = null;
            string pathlengthThreshold = null;
            if (!defaultThreshold)
            {
                danglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                  Constants.DanglingLinkThresholdNode);
                pathlengthThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                  Constants.PathLengthThresholdNode);
            }

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles from graph in step4
                // Validate the graph
                if (!defaultThreshold)
                {
                    this.DanglingLinksThreshold = int.Parse(danglingThreshold, (IFormatProvider)null);
                    this.DanglingLinksPurger =
                      new DanglingLinksPurger(this.DanglingLinksThreshold);
                    this.RedundantPathLengthThreshold = int.Parse(pathlengthThreshold, (IFormatProvider)null);
                    this.RedundantPathsPurger =
                      new RedundantPathsPurger(this.RedundantPathLengthThreshold);
                }
                else
                {
                    this.DanglingLinksPurger =
                      new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null));
                    this.RedundantPathsPurger =
                      new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                }
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully");
                this.UnDangleGraph();

                ApplicationLog.WriteLine("Padena P1 : Step3 Completed Successfully");
                this.RemoveRedundancy();

                ApplicationLog.WriteLine("Padena P1 : Step4 Completed Successfully");
                if (isMicroorganism)
                {
                    Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null));
                }
                else
                {
                    ValidateGraph(graph, nodeName);
                }

            ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.RemoveRedundancy() validation for Padena step4 completed successfully");
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Validate RemoveErrorNodes() method is removing dangling nodes from the graph
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidatePadenaRemoveErrorNodes(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // and remove the dangling links from graph in step3
                // Validate the graph
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                // Find the dangling nodes and remove the dangling node
                DanglingLinksPurger danglingLinksPurger =
                  new DanglingLinksPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1);
                DeBruijnPathList danglingnodes = danglingLinksPurger.DetectErroneousNodes(graph);
                danglingLinksPurger.RemoveErroneousNodes(graph, danglingnodes);
                Assert.IsFalse(graph.GetNodes().Contains(danglingnodes.Paths[0].PathNodes[0]));

            ApplicationLog.WriteLine(@"Padena P1 :DeBruijnGraph.RemoveErrorNodes() validation for Padena step3 completed successfully");
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Validate the ParallelDeNovothis unDangleGraph() method which removes the dangling link
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="defaultThreshold">Default Threshold</param>
        /// <param name="smallSizeChromosome">Small size chromosome</param>
        internal void ValidatePadenaUnDangleGraph(string nodeName, bool defaultThreshold, bool smallSizeChromosome)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string expectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesCountAfterDanglingGraphNode);
            string danglingThreshold = null;
            
            if (!defaultThreshold)
                danglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1,graph in step2 
                this.KmerLength = int.Parse(kmerLength, null);
                if (!defaultThreshold)
                {
                    this.DanglingLinksThreshold = int.Parse(danglingThreshold, null);
                }
                else
                {
                    this.DanglingLinksThreshold = int.Parse(kmerLength, null) + 1;
                }
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully");
                this.DanglingLinksPurger = new DanglingLinksPurger(this.DanglingLinksThreshold);
                this.UnDangleGraph();

                ApplicationLog.WriteLine("Padena P1 : Step3 Completed Successfully");
                if (smallSizeChromosome)
                {
                    Assert.AreEqual(expectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null));
                }
                else
                {
                    ValidateGraph(graph, nodeName);
                }

            ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.UndangleGraph() validation for Padena step3 completed successfully");
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Validate ParallelDeNovothis.BuildContigs() by passing graph object
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        /// <param name="isChromosomeRC">Is chromosome RC?</param>
        internal void ValidateDe2thisBuildContigs(string nodeName, bool isChromosomeRC)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string expectedContigsString = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.ContigsNode);
            string[] expectedContigs;
            if (!expectedContigsString.ToUpper(CultureInfo.InstalledUICulture).Contains("PADENATESTDATA"))
                expectedContigs = expectedContigsString.Split(',');
            else
                expectedContigs =
                  ReadStringFromFile(expectedContigsString).Replace("\r\n", "").Split(',');

            string expectedContigsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCount);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate the contigs
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.DanglingLinksPurger = new DanglingLinksPurger(this.KmerLength);
                this.UnDangleGraph();
                this.RedundantPathsPurger = new RedundantPathsPurger(this.KmerLength + 1);
                this.RemoveRedundancy();
                this.ContigBuilder = new SimplePathContigBuilder();
                IList<ISequence> contigs = this.BuildContigs().ToList();

                // Validate contigs count only for Chromosome files. 
                if (isChromosomeRC)
                {
                    Assert.AreEqual(expectedContigsCount, contigs.Count.ToString((IFormatProvider)null));
                }
                // validate all contigs of a sequence.
                else
                {
                    for (int index = 0; index < contigs.Count(); index++)
                    {
                        Assert.IsTrue(expectedContigs.Contains(new string(contigs[index].Select(a => (char)a).ToArray())) ||
                          expectedContigs.Contains(new string(contigs[index].GetReverseComplementedSequence().Select(a => (char)a).ToArray())));
                    }
                }


            ApplicationLog.WriteLine(@"Padena P1 :ParallelDeNovothis.BuildContigs() validation for Padena step5 completed successfully");
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Validate KmersOfSequence ctor by passing base sequence reads, kmer length and
        /// built kmers
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        internal void ValidateKmersOfSequenceCtorWithBuildKmers(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                SequenceToKmerBuilder builder = new SequenceToKmerBuilder();
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>();

                // Validate KmersOfSequence ctor using build kmers
                foreach (ISequence sequence in sequenceReads)
                {
                    KmersOfSequence kmer = builder.Build(sequence, int.Parse(kmerLength, (IFormatProvider)null));
                    KmersOfSequence kmerSequence = new KmersOfSequence(sequence,
                      int.Parse(kmerLength, (IFormatProvider)null), kmer.Kmers);
                    lstKmers.Add(kmerSequence);
                }

                ValidateKmersList(lstKmers, sequenceReads.ToList(), nodeName);

            ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ctor with build kmers method validation completed successfully");
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Validate library information
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void GetLibraryInformation(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string expectedPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.PairedReadsCountNode);
            string expectedLibraray = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.LibraryName);
            string expectedStdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.Mean);

            IList<MatePair> pairedreads = new List<MatePair>();

            // Get the input reads 
            IEnumerable<ISequence> sequences = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequences = parser.Parse().ToList();
            parser.Close ();
                // Convert reads to map paired reads.
                MatePairMapper pair = new MatePairMapper();
                pairedreads = pair.Map(new List<ISequence>(sequences));

                // Validate Map paired reads.
                Assert.AreEqual(expectedPairedReadsCount,
                  pairedreads.Count.ToString((IFormatProvider)null));

                // Get library infomration and validate
                CloneLibraryInformation libraryInfo =
                  CloneLibrary.Instance.GetLibraryInformation
                  (pairedreads[0].Library);

                Assert.AreEqual(expectedStdDeviation, libraryInfo.StandardDeviationOfInsert.ToString((IFormatProvider)null));
                Assert.AreEqual(expectedLibraray, libraryInfo.LibraryName.ToString((IFormatProvider)null));
                Assert.AreEqual(mean, libraryInfo.MeanLengthOfInsert.ToString((IFormatProvider)null));

            ApplicationLog.WriteLine(@"Padena P1 : Map paired reads has been verified successfully");
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Validate KmersOfSequence ctor by passing base sequence reads, kmer length and
        /// built kmers and validate its properties.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        internal void ValidateKmersOfSequenceCtorProperties(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.BaseSequenceNode);
            string expectedKmers = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmersCountNode);

            // Get the input reads
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                SequenceToKmerBuilder builder = new SequenceToKmerBuilder();

                KmersOfSequence kmer = builder.Build(sequenceReads.ToList()[0],
                  int.Parse(kmerLength, (IFormatProvider)null));
                KmersOfSequence kmerSequence = new KmersOfSequence(sequenceReads.ToList()[0],
                  int.Parse(kmerLength, (IFormatProvider)null), kmer.Kmers);

                // Validate KmerOfSequence properties.
                Assert.AreEqual(expectedSeq, new string(kmerSequence.BaseSequence.Select(a => (char)a).ToArray()));
                Assert.AreEqual(expectedKmers, kmerSequence.Kmers.Count.ToString((IFormatProvider)null));


            ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ctor with build kmers method validation completed successfully");
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Validate building map reads to contigs.
        /// </summary>
        /// <param name="nodeName">xml node name used for a different testcases</param>
        /// <param name="isFullOverlap">True if full overlap else false</param>
        internal void ValidateMapReadsToContig(string nodeName, bool isFullOverlap)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadMapLength);
            string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ReadStartPos);
            string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigStartPos);

            string[] expectedReadmapLength = readMapLengthString.Split(',');
            string[] expectedReadStartPos = readStartPosString.Split(',');
            string[] expectedContigStartPos = contigStartPosString.Split(',');

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();

                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                Assert.AreEqual(maps.Count, sequenceReads.Count());

                Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ToList()[0].ID];

                for (int i = 0; i < SortContigsData(readMaps.Keys.ToList()).Count; i++)
                {
                    IList<ReadMap> readMap = readMaps[SortContigsData(readMaps.Keys.ToList())[i]];

                    if (isFullOverlap)
                    {
                        Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i);
                        Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i);
                        Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i);
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap);
                    }
                    else
                    {
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap);
                        break;
                    }
                }


            ApplicationLog.WriteLine("PADENA P1 :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully");
        }
Ejemplo n.º 24
0
        /// <summary>
        /// Validate KmersOfSequence ToSequences() method which returns kmers sequence
        /// using its positions
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateKmersOfSequenceToSequences(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string kmerOutputFile = utilityObj.xmlUtil.GetTextValue(nodeName,
                  Constants.KmersOutputFileNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Get the array of kmer sequence using ToSequence()
                int index = 0;

                // Validate the generated kmer sequence with the expected output
                using (StreamReader kmerFile = new StreamReader(kmerOutputFile))
                {
                    string line = string.Empty;
                    List<string[]> fileContent = new List<string[]>();
                    while (null != (line = kmerFile.ReadLine()))
                    {
                        fileContent.Add(line.Split(','));
                    }

                    foreach (ISequence sequenceRead in sequenceReads)
                    {
                        int count = 0;
                        KmersOfSequence kmerSequence = new KmersOfSequence(sequenceRead,
                          int.Parse(kmerLength, (IFormatProvider)null), lstKmers[index].Kmers);
                        IEnumerable<ISequence> sequences = kmerSequence.KmersToSequences();
                        foreach (ISequence sequence in sequences)
                        {
                            string aab = new string(sequence.Select(a => (char)a).ToArray());
                            Assert.AreEqual(fileContent[index][count], aab);
                            count++;
                        }
                        index++;
                    }
                }


            ApplicationLog.WriteLine(@"Padena P1 : KmersOfSequence ToSequences() method validation completed successfully");
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Validate FilterPairedRead.FilterPairedRead() by passing graph object
        /// </summary>
        /// <param name="nodeName">xml node name used for a differnt testcase.</param>
        internal void ValidateContigDistance(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.RedundantThreshold);
            string expectedContigPairedReadsCount = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.ContigPairedReadsCount);
            string distanceBetweenFirstContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenFirstContig);
            string distanceBetweenSecondContigs = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.DistanceBetweenSecondContig);
            string firstStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.FirstContigStandardDeviation);
            string secondStandardDeviation = utilityObj.xmlUtil.GetTextValue(
              nodeName, Constants.SecondContigStandardDeviation);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null);
                this.DanglingLinksPurger =
                  new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                  new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                IList<ISequence> sortedContigs = SortContigsData(contigs.ToList());
                ReadContigMapper mapper = new ReadContigMapper();

                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);

                // Find map paired reads.
                MatePairMapper mapPairedReads = new MatePairMapper();
                ContigMatePairs pairedReads = mapPairedReads.MapContigToMatePairs(sequenceReads, maps);

                // Filter contigs based on the orientation.
                OrientationBasedMatePairFilter filter = new OrientationBasedMatePairFilter();
                ContigMatePairs contigpairedReads = filter.FilterPairedReads(pairedReads, 0);

                // Calculate the distance between contigs.
                DistanceCalculator calc = new DistanceCalculator(contigpairedReads);
                calc.CalculateDistance();
                Assert.AreEqual(expectedContigPairedReadsCount,
                    contigpairedReads.Values.Count.ToString((IFormatProvider)null));

                Dictionary<ISequence, IList<ValidMatePair>> map;
                IList<ValidMatePair> valid;
                ISequence firstSeq = sortedContigs[0];
                ISequence secondSeq = sortedContigs[1];
                if (contigpairedReads.ContainsKey(firstSeq))
                {
                    map = contigpairedReads[firstSeq];
                }
                else
                {
                    map = contigpairedReads[secondSeq];
                }

                if (map.ContainsKey(firstSeq))
                {
                    valid = map[firstSeq];
                }
                else
                {
                    valid = map[secondSeq];
                }

                // Validate distance and standard deviation between contigs.
                Assert.AreEqual(float.Parse(distanceBetweenFirstContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[0]);
                Assert.AreEqual(float.Parse(distanceBetweenSecondContigs, (IFormatProvider)null),
                  valid.First().DistanceBetweenContigs[1]);
                Assert.AreEqual(float.Parse(firstStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[0]);
                Assert.AreEqual(float.Parse(secondStandardDeviation, (IFormatProvider)null),
                  valid.First().StandardDeviation[1]);
            

            ApplicationLog.WriteLine("PADENA P1 : DistanceCalculator() validation for Padena step6:step5 completed successfully");
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Validate Validate DeBruijinGraph properties
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijinGraphproperties(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string ExpectedNodesCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.GraphNodesCountNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();


                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                DeBruijnGraph graph = this.Graph;

                ApplicationLog.WriteLine("Padena P1 : Step1,2 Completed Successfully");

                // Validate DeBruijnGraph Properties.
                Assert.AreEqual(ExpectedNodesCount, graph.GetNodes().Count().ToString((IFormatProvider)null));


            ApplicationLog.WriteLine(@"Padena P1 : ParallelDeNovothis CreateGraph() validation for Padena step2 completed successfully");
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Validate scaffold sequence for a given input reads.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateScaffoldSequence(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold);
            string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName);
            string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation);
            string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean);
            string inputRedundancy = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InputRedundancy);
            string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldSeq);
            string[] scaffoldSeqNodes = expectedSeq.Split(',');

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();
                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null);
                this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null));
                this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();

                // Build contig.
                this.RemoveRedundancy();
                IEnumerable<ISequence> contigs = this.BuildContigs();

                // Find map paired reads.
                CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null),
                 float.Parse(stdDeviation, null));
                IEnumerable<ISequence> scaffoldSeq;

                using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder())
                {
                    scaffoldSeq = scaffold.BuildScaffold(
                       sequenceReads, contigs.ToList(), this.KmerLength, redundancy: Int32.Parse(inputRedundancy, null));
                }

                AlignmentHelpers.CompareSequenceLists(new HashSet<string>(scaffoldSeqNodes), scaffoldSeq.ToList());


            ApplicationLog.WriteLine("PADENA P1 : Scaffold sequence : validation for Padena step6:step8 completed successfully");
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Validate RemoveExtension() method of DeBruijnNode 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeRemoveExtension(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validates count before removing right and left extension nodes.
                Assert.AreEqual(lstKmers[1].Kmers.First().Count,
                  node.RightExtensionNodesCount);
                Assert.AreEqual(1, node.RightExtensionNodesCount);
                Assert.AreEqual(1, node.LeftExtensionNodesCount);

                // Remove right and left extension nodes.
                node.RemoveExtensionThreadSafe(rightnode);
                node.RemoveExtensionThreadSafe(leftnode);

                // Validate node after removing right and left extensions.
                Assert.AreEqual(0, node.RightExtensionNodesCount);
                Assert.AreEqual(0, node.LeftExtensionNodesCount);

            ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully");
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Validates the Mummer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isFilePath"></param>
        /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param>
        void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList)
        {
            ISequence referenceSeq;
            ISequence querySeq;
            IList<ISequence> querySeqs = new List<ISequence>();
            string referenceSequence;
            string querySequence;
            IList<IPairwiseSequenceAlignment> align;

            if (isFilePath)
            {
                // Gets the reference sequence from the configuration file
                string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastAParser parser = new FastAParser();
                IEnumerable<ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq = referenceSeqs.FirstOrDefault();
                Assert.IsNotNull(referenceSeq);
                referenceSequence = referenceSeq.ConvertToString();
                parser.Close();

                // Gets the reference sequence from the configuration file
                string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastAParser queryParserObj = new FastAParser();
                querySeqs = queryParserObj.Parse(queryFilePath).ToList();
                querySeq = querySeqs.FirstOrDefault();
                Assert.IsNotNull(querySeq);
                querySequence = querySeq.ConvertToString();
                queryParserObj.Close();
            }
            else
            {
                // Gets the reference sequence from the configuration file
                referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode);
                string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);
                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence);
                
                querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode);
                referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode);
                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence);
                querySeqs = new List<ISequence> {querySeq};
            }

            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            var mumAlignObj = new Bio.Algorithms.MUMmer.MUMmerAligner
            {
                LengthOfMUM = long.Parse(mumLength, null),
                GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null)
            };

            if (isSeqList)
            {
                querySeqs.Add(referenceSeq);
                align = mumAlignObj.Align(querySeqs);
            }
            else
            {
                align = mumAlignObj.AlignSimple(referenceSeq, querySeqs);
            }

            string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);
            Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null));
            ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence));

            string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode);
            IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment();
            var alignedSeq = new PairwiseAlignedSequence
            {
                FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]),
                SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]),
                Score = Convert.ToInt32(expectedScore, null),
                FirstOffset = Int32.MinValue,
                SecondOffset = Int32.MinValue
            };
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(CompareAlignment(align, expectedOutput));
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
        }
Ejemplo n.º 30
0
        /// <summary>
        /// Validate building map reads to contigs.
        /// </summary>
        /// <param name="nodeName">xml node name used for a different testcases</param>
        /// <param name="IsFullOverlap">True if full overlap else false</param>
        /// //TODO: This test was originally written with hard coded assumptions about the direction of the 
        /// returned reads, currently this test has a hack to "flip" some reads to match these hard coded 
        /// assumptions.  This should be cleaned up.
        internal void ValidateMapReadsToContig(string nodeName, bool IsFullOverlap)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.KmerLengthNode);
            string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.DanglingLinkThresholdNode);
            string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.RedundantThreshold);
            string readMapLengthString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ReadMapLength);
            string readStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ReadStartPos);
            string contigStartPosString = utilityObj.xmlUtil.GetTextValue(nodeName,
                Constants.ContigStartPos);
            string[] expectedReadmapLength = readMapLengthString.Split(',');
            string[] expectedReadStartPos = readStartPosString.Split(',');
            string[] expectedContigStartPos = contigStartPosString.Split(',');

            // Get the input reads and build kmerssequences
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
                parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                sequenceReads = parser.Parse().ToList();
                parser.Close();

                // Build kmers from step1,graph in step2 
                // Remove the dangling links from graph in step3
                // Remove bubbles form the graph in step4 
                // Pass the graph and build contigs
                // Validate contig reads
                this.KmerLength = Int32.Parse(kmerLength, (IFormatProvider)null);
                this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, (IFormatProvider)null); ;
                this.DanglingLinksPurger =
                    new DanglingLinksPurger(Int32.Parse(daglingThreshold, (IFormatProvider)null));
                this.RedundantPathsPurger =
                    new RedundantPathsPurger(Int32.Parse(redundantThreshold, (IFormatProvider)null));
                this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                this.CreateGraph();
                this.UnDangleGraph();
                this.ContigBuilder = new SimplePathContigBuilder();
                this.RemoveRedundancy();

                //IList<ISequence> contigs = this.BuildContigs().ToList();

                IList<ISequence> listContigs = this.BuildContigs().ToList();
                //Hack to satisfy the assumptions of one test by flipping the read to its reverse complement
                if (nodeName == Constants.MapReadsToContigFullOverlapNode)
                {
                    listContigs[0] = (listContigs[0] as Sequence).GetReverseComplementedSequence();
                }

                IList<ISequence> sortedContigs = SortContigsData(listContigs);
                ReadContigMapper mapper = new ReadContigMapper();
                ReadContigMap maps = mapper.Map(sortedContigs, sequenceReads, this.KmerLength);
                Assert.AreEqual(maps.Count, sequenceReads.Count());

                Dictionary<ISequence, IList<ReadMap>> readMaps = maps[sequenceReads.ElementAt(0).ID];
                IList<ReadMap> readMap = null;

                for (int i = 0; i < sortedContigs.Count; i++)
                {
                    readMap = readMaps[sortedContigs[i]];
                    if (IsFullOverlap)
                    {
                        Assert.AreEqual(expectedReadmapLength[i], readMap[0].Length.ToString((IFormatProvider)null), "readMap failed for pos " + i);
                        Assert.AreEqual(expectedContigStartPos[i], readMap[0].StartPositionOfContig.ToString((IFormatProvider)null), "contigStart failed for pos " + i);
                        Assert.AreEqual(expectedReadStartPos[i], readMap[0].StartPositionOfRead.ToString((IFormatProvider)null), "readStart failed for pos " + i);
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.FullOverlap);
                    }
                    else
                    {
                        Assert.AreEqual(readMap[0].ReadOverlap, ContigReadOverlapType.PartialOverlap);
                        break;
                    }
                }


            ApplicationLog.WriteLine(
                "Padena BVT :ReadContigMapper.Map() validation for Padena step6:step2 completed successfully");
        }