/// <summary>
        /// Reads in the fasta file.
        /// </summary>
        /// <param name="reader">your local filepath for genbank</param>
        /// <returns>list of BioPatML Sequences</returns>
        public override SequenceList Read(TextReader reader)
        {
            //Create the parser first
            ISequenceParser fastaParser = new FastaParser();

            List<ISequence> mbfSequences = fastaParser.Parse(reader);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
Beispiel #2
0
        public void FastaFormatterWithParseValidateFormat()
        {
            // Gets the expected sequence from the Xml
            string filePath = Utility._xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                            Constants.FilePathNode);

            Assert.IsTrue(File.Exists(filePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter BVT: File Exists in the Path '{0}'.", filePath));

            FastaParser    parser    = new FastaParser();
            FastaFormatter formatter = new FastaFormatter();

            // Read the original file
            IList <ISequence> seqsOriginal = null;

            parser       = new FastaParser();
            seqsOriginal = parser.Parse(filePath);
            Assert.IsNotNull(seqsOriginal);

            // Use the formatter to write the original sequences to a temp file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                   Constants.FastaTempFileName));

            using (TextWriter writer = new StreamWriter(Constants.FastaTempFileName))
            {
                foreach (Sequence s in seqsOriginal)
                {
                    formatter.Format(s, writer);
                }
            }

            // Read the new file, then compare the sequences
            IList <ISequence> seqsNew = null;

            parser  = new FastaParser();
            seqsNew = parser.Parse(Constants.FastaTempFileName);
            Assert.IsNotNull(seqsNew);
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter BVT: New Sequence is '{0}'.",
                                                   seqsNew[0].ToString()));

            // Now compare the sequences.
            int countOriginal = seqsOriginal.Count();
            int countNew      = seqsNew.Count();

            Assert.AreEqual(countOriginal, countNew);
            ApplicationLog.WriteLine("The Number of sequences are matching.");

            int i;

            for (i = 0; i < countOriginal; i++)
            {
                Assert.AreEqual(seqsOriginal[i].ID, seqsNew[i].ID);
                string orgSeq = seqsOriginal[i].ToString();
                string newSeq = seqsNew[i].ToString();
                Assert.AreEqual(orgSeq, newSeq);
                Console.WriteLine(string.Format(null,
                                                "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.",
                                                seqsNew[i].ID));
                ApplicationLog.WriteLine(string.Format(null,
                                                       "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.",
                                                       seqsNew[i].ID));
            }

            // Passed all the tests, delete the tmp file. If we failed an Assert,
            // the tmp file will still be there in case we need it for debugging.
            File.Delete(Constants.FastaTempFileName);
            ApplicationLog.WriteLine("Deleted the temp file created.");
        }
Beispiel #3
0
        public void PerformNeedlemanWunschPerf()
        {
            // Get Sequence file path.
            string refPath =
                Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName,
                                              Constants.RefFilePathNode);
            string queryPath =
                Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName,
                                              Constants.QueryFilePathNode);
            string smFilePath =
                Utility._xmlUtil.GetTextValue(Constants.AlignmentAlgorithmNodeName,
                                              Constants.SMFilePathNode);

            // Create a List for input files.
            List <string> lstInputFiles = new List <string>();

            lstInputFiles.Add(refPath);
            lstInputFiles.Add(queryPath);

            FastaParser       parserObj = new FastaParser();
            IList <ISequence> seqs1     = parserObj.Parse(refPath);

            parserObj = new FastaParser();
            IList <ISequence> seqs2 = parserObj.Parse(queryPath);

            IAlphabet alphabet          = Alphabets.DNA;
            ISequence originalSequence1 = seqs1[0];
            ISequence originalSequence2 = seqs2[0];

            ISequence aInput = new Sequence(alphabet, originalSequence1.ToString());
            ISequence bInput = new Sequence(alphabet, originalSequence2.ToString());

            SimilarityMatrix sm = new SimilarityMatrix(smFilePath);

            nwObj                  = new NeedlemanWunschAligner();
            nwObj.GapOpenCost      = -10;
            nwObj.GapExtensionCost = -10;
            nwObj.SimilarityMatrix = sm;

            _watchObj = new Stopwatch();
            _watchObj.Reset();
            _watchObj.Start();
            long memoryStart = GC.GetTotalMemory(false);

            // Align sequences using smith water man algorithm.
            IList <IPairwiseSequenceAlignment> alignment = nwObj.AlignSimple(aInput, bInput);

            _watchObj.Stop();
            long memoryEnd = GC.GetTotalMemory(false);

            string memoryUsed = (memoryEnd - memoryStart).ToString();

            // Display Needlemanwunsch perf test case execution details.
            DisplayTestCaseHeader(lstInputFiles, _watchObj, memoryUsed,
                                  "NeedlemanWunsch");

            Console.WriteLine(string.Format(
                                  "Needleman Wunsch AlignSimple() method, Alignment Score is : {0}",
                                  alignment[0].PairwiseAlignedSequences[0].Score.ToString()));

            // Dispose NeedlemanWunsch object
            nwObj = null;
        }
Beispiel #4
0
        /// <summary>
        /// Validate formatted BAM file.
        /// </summary>
        /// <param name="nodeName">Different xml nodes used for different test cases</param>
        /// <param name="BAMParserPam">BAM Format method parameters</param>
        void ValidateBAMFormatter(string nodeName,
                                  BAMParserParameters BAMParserPam)
        {
            // Get input and output values from xml node.
            string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.FilePathNode);
            string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.AlignedSeqCountNode);

            Stream stream = null;
            SequenceAlignmentMap seqAlignment = null;

            using (BAMParser bamParserObj = new BAMParser())
            {
                // Parse a BAM file.
                seqAlignment = bamParserObj.Parse(bamFilePath);

                // Create a BAM formatter object.
                BAMFormatter formatterObj = new BAMFormatter();

                // Write/Format aligned sequences to BAM file.
                switch (BAMParserPam)
                {
                case BAMParserParameters.StreamWriter:
                    using (stream = new
                                    FileStream(Constants.BAMTempFileName,
                                               FileMode.Create, FileAccess.Write))
                    {
                        formatterObj.Format(seqAlignment, stream);
                    }
                    break;

                case BAMParserParameters.FileName:
                    formatterObj.Format(seqAlignment, Constants.BAMTempFileName);
                    break;

                case BAMParserParameters.IndexFile:
                    formatterObj.Format(seqAlignment, Constants.BAMTempFileName,
                                        Constants.BAMTempIndexFile);
                    File.Exists(Constants.BAMTempIndexFile);
                    break;

                default:
                    break;
                }

                // Parse formatted BAM file and validate aligned sequences.
                SequenceAlignmentMap expectedSeqAlignmentMap = bamParserObj.Parse(
                    Constants.BAMTempFileName);


                // Validate Parsed BAM file Header record fileds.
                ValidateBAMHeaderRecords(nodeName, expectedSeqAlignmentMap);

                IList <SAMAlignedSequence> alignedSeqs = expectedSeqAlignmentMap.QuerySequences;

                Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null));

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath);

                    // Validate aligned sequences from BAM file.
                    for (int index = 0; index < alignedSeqs.Count; index++)
                    {
                        Assert.AreEqual(expectedSequences[index].ToString(),
                                        alignedSeqs[index].QuerySequence.ToString());

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Formatter BVT : Validated Aligned sequence :{0} successfully",
                                                               alignedSeqs[index].QuerySequence.ToString()));
                        Console.WriteLine(string.Format((IFormatProvider)null,
                                                        "BAM Formatter BVT : Validated the aligned sequence :{0} successfully",
                                                        alignedSeqs[index].QuerySequence.ToString()));
                    }
                }
            }
            File.Delete(Constants.BAMTempFileName);
            File.Delete(Constants.BAMTempIndexFile);
        }
Beispiel #5
0
        public void TestMuscleMultipleSequenceAlignmentRunningTime()
        {
            // Test on DNA benchmark dataset
            ISequenceParser parser   = new FastaParser();
            string          filepath = @"TestUtils\FASTA\RunningTime\BOX246.xml.afa";

            MoleculeType mt = MoleculeType.Protein;

            IList <ISequence> orgSequences = parser.Parse(filepath);

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            //filepath = @"TestUtils\FASTA\RunningTime\12_raw.afa";
            //List<ISequence> sequences = parser.Parse(filepath);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(orgSequences[i].ToString());
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = true;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 2;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;


            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = null;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new Exception("Invalid molecular type");
            }

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("The number of partitions is: {0}", numberOfPartitions);
            Console.WriteLine("The number of degrees is: {0}", numberOfDegrees);
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));



            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));
            ((FastaParser)parser).Dispose();
        }
Beispiel #6
0
        public void TestProgressiveAligner()
        {
            MsaUtils.SetProfileItemSets(MoleculeType.DNA);

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;
            int kmerLength       = 4;

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            progressiveAligner.Align(sequences, tree);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");
            ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString());
            Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString());



            sequences = new List <ISequence>();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.NumberOfNodes; ++i)
            {
                Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID);
            }
            for (int i = 0; i < tree.NumberOfEdges; ++i)
            {
                Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length);
            }

            SequenceWeighting sw = new SequenceWeighting(tree);

            for (int i = 0; i < sw.Weights.Length; ++i)
            {
                Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]);
            }

            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }


            MsaUtils.SetProfileItemSets(MoleculeType.Protein);
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"TestUtils\FASTA\Protein\BB11001.tfa";
            IList <ISequence> orgSequences = parser.Parse(filepath);


            sequences = MsaUtils.UnAlign(orgSequences);

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
            kmerLength       = 4;
            int numberOfSequences = orgSequences.Count;

            gapOpenPenalty   = -13;
            gapExtendPenalty = -5;

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID);
            }
            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }

            ((FastaParser)parser).Dispose();
        }
Beispiel #7
0
        public void AllEditableScenarios()
        {
            string filepathOriginal = @"TestUtils\Fasta\5_sequences.fasta";

            Assert.IsTrue(File.Exists(filepathOriginal));

            FastaParser fastaParser = null;

            try
            {
                fastaParser = new FastaParser();
                IList <ISequence> sequences;
                string[]          expectedSequences = new string[] {
                    "KRIPKSQNLRSIHSIFPFLEDKLSHLN",
                    "LNIPSLITLNKSIYVFSKRKKRLSGFLHN",
                    "HEAGAWGHEEHEAGAWGHEEHEAGAWGHEE",
                    "PAWHEAEPAWHEAEPAWHEAEPAWHEAEPAWHEAE",
                    "CGGUCCCGCGGUCCCGCGGUCCCGCGGUCCCG"
                };

                fastaParser.EnforceDataVirtualization = true;

                sequences = fastaParser.Parse(filepathOriginal, true);
                int sequenceCount = sequences.Count;

                for (int i = 0; i < sequenceCount; i++)
                {
                    Sequence actualSequence = sequences[i] as Sequence;
                    actualSequence.IsReadOnly = false;
                    ISequenceItem item = actualSequence[1];

                    actualSequence.Add(item);
                    expectedSequences[i] += item.Symbol;
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.Remove(item);
                    int indexOfItem = expectedSequences[i].IndexOf(item.Symbol);
                    expectedSequences[i] = expectedSequences[i].Remove(indexOfItem, 1);
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.RemoveAt(0);
                    expectedSequences[i] = expectedSequences[i].Remove(0, 1);
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.RemoveRange(2, 5);
                    expectedSequences[i] = expectedSequences[i].Remove(2, 5);
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.Replace(0, 'C');
                    expectedSequences[i] = expectedSequences[i].Remove(0, 1);
                    expectedSequences[i] = expectedSequences[i].Insert(0, "C");
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.ReplaceRange(3, "GG");
                    expectedSequences[i] = expectedSequences[i].Remove(3, 2);
                    expectedSequences[i] = expectedSequences[i].Insert(3, "GG");
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.Insert(3, item);
                    expectedSequences[i] = expectedSequences[i].Insert(3, item.Symbol.ToString());
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    actualSequence.InsertRange(2, "CC");
                    expectedSequences[i] = expectedSequences[i].Insert(2, "CC");
                    Assert.AreEqual(expectedSequences[i], actualSequence.ToString());

                    bool actualContainsValue   = actualSequence.Contains(actualSequence[3]);
                    bool expectedContainsValue = expectedSequences[i].Contains(actualSequence[3].Symbol.ToString());
                    Assert.AreEqual(actualContainsValue, expectedContainsValue);
                }
            }
            finally
            {
                if (fastaParser != null)
                {
                    fastaParser.Dispose();
                }
            }
        }
Beispiel #8
0
        public void FastaForUniprotDutpase()
        {
            int    expectedSequenceCount = 2015;
            string filepath = @"TestUtils\FASTA\uniprot-dutpase.fasta";

            Assert.IsTrue(File.Exists(filepath));

            List <string> headers   = new List <string>();
            List <string> sequences = new List <string>();

            using (StreamReader reader = File.OpenText(filepath))
            {
                string        line = null;
                StringBuilder s    = null;
                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith(">", StringComparison.CurrentCultureIgnoreCase))
                    {
                        if (s != null)
                        {
                            sequences.Add(s.ToString());
                            s = null;
                        }
                        headers.Add(line);
                    }
                    else
                    {
                        if (s == null)
                        {
                            s = new StringBuilder();
                        }
                        s.Append(line);
                    }
                }
                if (s != null)
                {
                    sequences.Add(s.ToString());
                    s = null;
                }
            }
            Assert.AreEqual(expectedSequenceCount, headers.Count);
            Assert.AreEqual(expectedSequenceCount, sequences.Count);

            IList <ISequence> seqs   = null;
            FastaParser       parser = new FastaParser();

            using (StreamReader reader = File.OpenText(filepath))
            {
                seqs = parser.Parse(reader);
            }
            Assert.IsNotNull(seqs);
            Assert.AreEqual(expectedSequenceCount, seqs.Count);

            for (int i = 0; i < expectedSequenceCount; i++)
            {
                Sequence seq = (Sequence)seqs[i];
                Assert.IsNotNull(seq);
                Assert.AreEqual(sequences[i], seq.ToString());
                byte[] tmpEncodedSeq = new byte[seq.Count];
                (seq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
                Assert.AreEqual(sequences[i].Length, tmpEncodedSeq.Length);
                Assert.AreEqual(headers[i].Substring(1), seq.ID);
            }
            ((FastaParser)parser).Dispose();
        }
Beispiel #9
0
        /// <summary>
        /// General method to validate SAM parser method.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="parseTypes">enum type to execute different overload</param>
        void ValidateSAMParserSeqAlign(
            string nodeName,
            ParseOrFormatTypes method)
        {
            // Gets the expected sequence from the Xml
            string filePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);

            using (SAMParser parser = new SAMParser())
            {
                SequenceAlignmentMap alignments = null;

                // Parse SAM File
                switch (method)
                {
                case ParseOrFormatTypes.ParseOrFormatText:
                    using (TextReader reader = new StreamReader(filePath))
                    {
                        alignments = parser.Parse(reader);
                    }
                    break;

                case ParseOrFormatTypes.ParseOrFormatTextWithFlag:
                    using (TextReader reader = new StreamReader(filePath))
                    {
                        alignments = parser.Parse(reader, true);
                    }
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileName:
                    alignments = parser.Parse(filePath);
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag:
                    alignments = parser.Parse(filePath, true);
                    break;
                }

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences =
                        parserObj.Parse(expectedSequenceFile);

                    // Validate parsed output with expected output
                    for (int index = 0;
                         index < alignments.QuerySequences.Count;
                         index++)
                    {
                        for (int count = 0;
                             count < alignments.QuerySequences[index].Sequences.Count;
                             count++)
                        {
                            Assert.AreEqual(expectedSequences[index].ToString(),
                                            alignments.QuerySequences[index].Sequences[count].ToString());
                        }
                    }
                }
            }
        }
Beispiel #10
0
        /// <summary>
        /// Validate formatter all format method overloads with filePath\textwriter
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="formatTypes">enum type to execute different overload</param>
        void ValidateSAMFormatter(string nodeName,
                                  ParseOrFormatTypes formatTypes)
        {
            // Gets the expected sequence from the Xml
            string filePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            ISequenceAlignmentParser parser = new SAMParser();

            try
            {
                IList <ISequenceAlignment> alignments = parser.Parse(filePath);
                SAMFormatter formatter = new SAMFormatter();
                switch (formatTypes)
                {
                case ParseOrFormatTypes.ParseOrFormatText:
                    using (TextWriter writer =
                               new StreamWriter(Constants.SAMTempFileName))
                    {
                        formatter.Format(alignments[0], writer);
                    }
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileName:
                    formatter.Format(alignments[0], Constants.SAMTempFileName);
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag:
                    formatter.Format(alignments, Constants.SAMTempFileName);
                    break;
                }
                alignments = parser.Parse(Constants.SAMTempFileName);

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile);

                    // Validate parsed output with expected output
                    int count = 0;
                    for (int index = 0; index < alignments.Count; index++)
                    {
                        for (int ialigned = 0; ialigned <
                             alignments[index].AlignedSequences.Count; ialigned++)
                        {
                            for (int iseq = 0; iseq <
                                 alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++)
                            {
                                Assert.AreEqual(expectedSequences[count].ToString(),
                                                alignments[index].AlignedSequences[ialigned].Sequences[iseq].ToString());
                                count++;
                            }
                        }
                    }
                }
            }
            finally
            {
                (parser as SAMParser).Dispose();
            }
        }
Beispiel #11
0
        /// <summary>
        /// Validate parser parse one method overloads with filePath\textreader
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="parseTypes">enum type to execute different overload</param>
        void ValidateSAMParserWithParseOne(string nodeName,
                                           ParseOrFormatTypes parseTypes)
        {
            // Gets the expected sequence from the Xml
            string filePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedSequenceFile = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            ISequenceAlignmentParser parser = new SAMParser();

            try
            {
                ISequenceAlignment alignment = null;

                // Parse SAM File
                switch (parseTypes)
                {
                case ParseOrFormatTypes.ParseOrFormatText:
                    using (TextReader reader = new StreamReader(filePath))
                    {
                        alignment = parser.ParseOne(reader);
                    }
                    break;

                case ParseOrFormatTypes.ParseOrFormatTextWithFlag:
                    using (TextReader reader = new StreamReader(filePath))
                    {
                        alignment = parser.ParseOne(reader, true);
                    }
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileName:
                    alignment = parser.ParseOne(filePath);
                    break;

                case ParseOrFormatTypes.ParseOrFormatFileNameWithFlag:
                    alignment = parser.ParseOne(filePath, true);
                    break;
                }

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences = parserObj.Parse(expectedSequenceFile);

                    // Validate parsed output with expected output
                    int count = 0;

                    for (int ialigned = 0; ialigned <
                         alignment.AlignedSequences.Count; ialigned++)
                    {
                        for (int iseq = 0; iseq <
                             alignment.AlignedSequences[ialigned].Sequences.Count; iseq++)
                        {
                            Assert.AreEqual(expectedSequences[count].ToString(),
                                            alignment.AlignedSequences[ialigned].Sequences[iseq].ToString());
                            count++;
                        }
                    }
                }
            }
            finally
            {
                (parser as SAMParser).Dispose();
            }
        }
Beispiel #12
0
        private void DoBLAST(string filename)
        {
            // Update progress bar, set content of user control to initial state
            

            // Load protein sequences
            FastaParser parser = new FastaParser();
            IList<ISequence> queryList = parser.Parse(filename).ToList();

            // Initialize and populate queue of query sequences
            Queue<QueueSequence> queryQueue = new Queue<QueueSequence>();
            int j = 0; // For debuging
            int progValue = 0;
            int currentProgress = 0;
            foreach (ISequence protein in queryList)
            {
                QueueSequence qp = new QueueSequence();
                qp.Sequence = protein;
                qp.Position = j;
                string name = j.ToString();
                j++;
                if (File.Exists(Up.ProjectDir + "\\xml\\" + name + ".xml"))
                {
                    IList<BlastResult> blastResults;
                    BlastXmlParser parser2 = new BlastXmlParser();
                    try
                    {
                        blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml");
                        progValue = Convert.ToInt32(Math.Round((double)currentProgress / queryList.Count() * 100, 0));
                        UpdateProgressBar(progValue, "Validating BLAST results.");
                        currentProgress++;
                    }
                    catch
                    {
                        queryQueue.Enqueue(qp);
                    }
                }
                else
                {
                    queryQueue.Enqueue(qp);
                }
            }

            // Initialize BLAST queue positions to having no jobs (EMPTY)
            BlastQueue blastQueue = new BlastQueue();
            // While there are proteins left to submit to BLAST, or there are 
            // busy jobs still on the queue
            UpdateProgressBar(progValue, "Starting up BLAST service, please wait.");
            while (queryQueue.Count > 0 || blastQueue.isBlastQueueBusy())
            {
                // Iterate over blastQueue
                for (int i = 0; i < BlastQueue.Length; i++)
                {
                    // Get blastJob from array and update status
                    BlastJob blastJob = blastQueue[i];
                    QueueSequence qp = new QueueSequence();
                    qp.Sequence = blastJob.Query;
                    qp.Position = blastJob.Position;
                    // if queue position is AVAILABLE
                    if (blastJob.JobStatus == BlastJob.AVAILABLE)
                    {
                        if (queryQueue.Count > 0)
                        {
                            QueueSequence qp2 = queryQueue.Dequeue();
                            // try to submit job, enqueue back the protein if submission failed.
                            try
                            {
                                blastQueue[i] = submit(qp2);
                                if (blastQueue[i].JobStatus == BlastJob.FAILED)
                                {
                                    blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                    queryQueue.Enqueue(qp2);
                                }
                                else
                                {
                                    UpdateProgressBar(progValue, "Submitting sequences to NCBI BLAST");
                                }
                            }
                            catch (Exception eee)
                            {
                                MessageBox.Show(eee.Message);
                            }
                        }
                    }
                    else
                    {
                        string jobId = blastJob.JobId;
                        NCBIBlastHandler blastService = blastJob.BlastService;
                        ServiceRequestInformation info = blastService.GetRequestStatus(jobId);
                        Thread.Sleep(BlastQueue.RequestDelay);
                        switch (info.Status)
                        {
                            case ServiceRequestStatus.Error:
                                blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                queryQueue.Enqueue(qp);
                                break;
                            case ServiceRequestStatus.Canceled:
                                blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                queryQueue.Enqueue(qp);
                                break;
                            case ServiceRequestStatus.Ready:
                                string result = blastService.GetResult(jobId, blastJob.SearchParams);
                                string name = blastJob.Position.ToString();
                                TextWriter tw = new StreamWriter(Up.ProjectDir + "\\xml\\" + name + ".xml");
                                tw.Write(result);
                                tw.Close();
                                Debug.WriteLine("BLAST JOB: " + jobId + " , " + name + " , " + info.StatusInformation);
                                
                                // Added by VF on Jan, 22, 2013. Catches invalid BLAST records
                                IList<BlastResult> blastResults;
                                BlastXmlParser parser2 = new BlastXmlParser();
                                bool parsePassed = false;
                                int fetchAttempts = 0;
                                while (!parsePassed && fetchAttempts < 3)
                                {
                                    try
                                    {
                                        
                                        blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml");
                                        parsePassed = true;
                                        Debug.WriteLine("FETCH OK  JobId: " + jobId + " InputOrder: " + name + ". This is attempt:" + fetchAttempts.ToString());

                                    }   
                                    catch (Exception eee)
                                    {
                                        Debug.WriteLine("Trying to fetch JobId: " + jobId + " InputOrder: " + name + ". This is attempt: " + fetchAttempts.ToString());
                                        parsePassed = false;
                                        result = blastService.GetResult(jobId, blastJob.SearchParams);
                                        TextWriter tw2 = new StreamWriter(Up.ProjectDir + "\\xml\\" + name + ".xml");
                                        tw2.Write(result);
                                        tw2.Close();
                                        fetchAttempts += 1;
                                    }
                                    Thread.Sleep(1000);                                
                                }
                                
                                try
                                {
                                    blastResults = parser2.Parse(Up.ProjectDir + "\\xml\\" + name + ".xml");
                                }
                                catch (Exception eee)
                                {
                                    blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                    queryQueue.Enqueue(qp);
                                    Debug.WriteLine("REQUEUE of JobId: " + " " + jobId + " InputOrder: " + name + " because max fetch is " + fetchAttempts.ToString());
                                    break;
                                }
                                
                                currentProgress += 1;
                                progValue = Convert.ToInt32(Math.Round((double)currentProgress / queryList.Count() * 100, 0));
                                UpdateProgressBar(progValue, "Saving");
                                blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                if (queryQueue.Count > 0)
                                {
                                    QueueSequence qp3 = queryQueue.Dequeue();
                                    try
                                    {
                                        blastQueue[i] = submit(qp3);

                                        if (blastQueue[i].JobStatus == BlastJob.FAILED)
                                        {
                                            blastQueue[i].JobStatus = BlastJob.AVAILABLE;
                                            queryQueue.Enqueue(qp3);

                                        }
                                        else
                                        {
                                            UpdateProgressBar(progValue, "Submitting sequences to NCBI BLAST");
                                        }
                                    }
                                    catch (Exception eee)
                                    {
                                        MessageBox.Show(eee.Message);
                                        MessageBox.Show("Error creating a jobId for sequence " + qp3.Position);
                                        throw new Exception("Error creating a jobId for sequence" + qp3.Position);
                                    }
                                }
                                break;
                            case ServiceRequestStatus.Queued:
                                break;
                            case ServiceRequestStatus.Waiting:
                                break;
                            default:
                                MessageBox.Show("BLAST error " + info.Status + " " + blastJob.JobStatus + " for " + qp.Position);
                                break;
                        }
                    }
                }
            }
        }
Beispiel #13
0
 /// <summary>
 /// Run task for a particular step. 
 /// </summary>
 /// <param name="task"></param>
 /// <returns></returns>
 private void RunTask(string task)
 {
     bool res = false;
     switch (task)
     {
         case "UserControl0":
             break;
         case "UserControl1":
             Util.SetupDirectories(Up.ProjectDir);
             break;
         case "UserControl2":
             if ((!File.Exists(Up.ProjectDir + "\\genes.fasta")) ||
                 (File.Exists(Up.ProjectDir + "\\genes.fasta") && (Up.FastaFile != "")))
             {
                 FastaParser parser = new FastaParser();
                 try
                 {
                     Up.QuerySequences = parser.Parse(Up.FastaFile).ToList();
                 }
                 catch
                 {
                     FatalErrorDialog("Error parsing FASTA file. Please confirm that the input file is in FASTA format. If the problem persists file a bug report at blip.codeplex.com.  The application will now be closed.");
                 }
                 File.Copy(Up.FastaFile, Up.ProjectDir + "\\genes.fasta", true);
             }
             break;
         case "UserControl3":
             break;
         case "UserControl4":
             UserControl4 uc4 = (CurrentControl as UserControl4);
             BlastUtil.RecordBlastThresholds(Up, uc4, ((ComboBoxItem)uc4.BlastProgram.SelectedItem).Content.ToString(), uc4.BlastDatabase.SelectedItem.ToString(), uc4.BlastAlgorithm.SelectedItem.ToString());
             /*
             MessageBox.Show(String.Format("Recorded Parameters:\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}",
                     Up.BlastProgram,
                     Up.BlastDatabase,
                     Up.BlastGeneticCode, 
                     Up.BlastAlgorithm, 
                     Up.BlastMaxEvalue, 
                     Up.BlastMaxNumHits, 
                     Up.BlastMinPercentIdentity, 
                     Up.BlastMinPercentQueryCoverage
                     ));
             */
             LaunchBlastPipeline(Up.ProjectDir + "\\genes.fasta");
             break;
         case "UserControl5":
             LogMessage("Set Pivot parameters.\r\n");
             break;
         case "UserControl6":
             UserControl6 uc6 = (CurrentControl as UserControl6);
             //Pivot.RecordPivotParameters(Up, uc6.CollectionNameBox.Text, uc6.CollectionTitleBox.Text);
             Pivot.RecordPivotParameters(Up, "blip", uc6.CollectionTitleBox.Text);
             break;
         case "UserControl7":
             break;
         case "UserControl8":
             UserControl8 uc8 = (CurrentControl as UserControl8);
             // progressBar1.Maximum = Up.QuerySequences.Count();
             // progressBar1.Minimum = 0;
             // progressBar1.Value = 0;
             uc8.SaveImagePreviewState();
             res = WriteCollection();
             break;
         case "UserControl9":
             UserControl9 uc9 = (CurrentControl as UserControl9);
             if (uc9.createProject.IsChecked == true)
             {
                 Debug.WriteLine("NEW");
                 UserControls.Clear();
                 UserControl0 uc0 = new UserControl0(Up);
                 UserControls.Add(uc0);
                 UserControl9 c9 = new UserControl9(Up);
                 UserControls.Add(c9);
                 UserControl1 uc1 = new UserControl1(Up);
                 UserControls.Add(uc1);
                 UserControl2 uc2 = new UserControl2(Up);
                 UserControls.Add(uc2);
                 //UserControl3 uc3 = new UserControl3(Up);
                 //UserControls.Add(uc3);
                 UserControl6 c6 = new UserControl6(Up);
                 UserControls.Add(c6);
                 UserControl4 c4 = new UserControl4(Up);
                 UserControls.Add(c4);
                 UserControl5 uc5 = new UserControl5(Up);
                 UserControls.Add(uc5);
                 UserControl8 c8 = new UserControl8(Up);
                 UserControls.Add(c8);
                 UserControl7 uc7 = new UserControl7(Up);
                 UserControls.Add(uc7);
             }
             if (uc9.loadProject.IsChecked == true)
             {
                 UserControls.Clear();
                 UserControl0 uc0 = new UserControl0(Up);
                 UserControls.Add(uc0);
                 UserControl9 c9 = new UserControl9(Up);
                 UserControls.Add(c9);
                 UserControl10 c10 = new UserControl10(Up);
                 UserControls.Add(c10);
                 UserControl11 c11 = new UserControl11(Up);
                 UserControls.Add(c11);
             }
             break;
         case "UserControl10":
             UserControl10 uc10 = (CurrentControl as UserControl10);
             Debug.WriteLine("UC10");
             
             
             Action<object> action = (object obj) =>
             {
                 StartWebServer("/", Up.CxmlDir, Up.WebServerPort);
             };
             Task t1 = new Task(action, "BLiP_WS");
             t1.Start();
             Previous_Button.IsEnabled = false;
             Next_Button.IsEnabled = false;
             Finish_Button.IsEnabled = true;
             break;
         default:
             break;
     }
 }
Beispiel #14
0
        /// <summary>
        /// Controls behaviour when user clicks the Next button
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void Next_Click(object sender, RoutedEventArgs e)
        {


            Previous_Button.IsEnabled = true; // Previous button should always be enabled after clicking Next.
            RunTask(CurrentControl.Name); // Run the task assocaited with the current control step
            MainGrid.Children.Remove(CurrentControl); // Remove the current control from the UI. 
            
            // Update the current step and bound checking to decide which buttons to enable/disable.
            if (CurrentStep < (UserControls.Count() - 1))
            {
                CurrentStep += 1;
            }
            if (CurrentStep == (UserControls.Count() - 1))
            {
                //Next_Button.IsEnabled = false;
                //Finish_Button.IsEnabled = true;
            }
            else
            {
                Next_Button.IsEnabled = true;
                Finish_Button.IsEnabled = false;
            }

            // Load the user control for the updated current step into the UI
            UserControl uc = UserControls[CurrentStep];
            uc.VerticalAlignment = VerticalAlignment.Top;
            MainGrid.Children.Add(uc);
            Grid.SetRow(uc, 0);
            Grid.SetColumn(uc, 1);
            CurrentControl = uc;
            LogText.Text = Up.Log; // Update Log
            
            // SPECIAL CASE: If uploading file, disable Next button until user selects a file from RunkTask or the file already exists.

            if (uc.Name == "UserControl2")
            {

                UserControl2 uc2 = CurrentControl as UserControl2; 
                Next_Button.IsEnabled = false;
                //System.Windows.MessageBox.Show(String.Format("CHECK: {0}", Up.ProjectDir));
                
                if (File.Exists(Up.ProjectDir + "\\genes.fasta"))
                {
                    //System.Windows.MessageBox.Show("TRUE");
                    (CurrentControl as UserControl2).LoadFastaFileGrid.Children.Clear();
                    TextBlock txt1 = new TextBlock();
                    FastaParser parser = new FastaParser();
                    try
                    {
                        Up.QuerySequences = parser.Parse(Up.ProjectDir + "\\genes.fasta").ToList();
                    }
                    catch
                    {
                        FatalErrorDialog("Error parsing FASTA file. Please confirm that the input file is in FASTA format. If the problem persists file a bug report at blip.codeplex.com.  The application will now be closed.");
                    }
                    txt1.Text = ("A file with " + Up.QuerySequences.Count() + " gene sequences exists in this folder.\n\nIf you want to load a new file select an empty project folder instead.\r\n");
                    txt1.Margin = new Thickness(55);
                    (CurrentControl as UserControl2).LoadFastaFileGrid.Children.Add(txt1);
                    Next_Button.IsEnabled = true;
                }
                else
                {
                    //System.Windows.MessageBox.Show("FALSE");
                    MainGrid.Children.Remove(CurrentControl);
                    UserControls[CurrentStep] = new UserControl2(Up);
                    uc = UserControls[CurrentStep];
                    uc.VerticalAlignment = VerticalAlignment.Top;
                    MainGrid.Children.Add(uc);
                    Grid.SetRow(uc, 0);
                    Grid.SetColumn(uc, 1);
                    CurrentControl = uc; 
                }
                (uc as UserControl2).RunCompleted += delegate(object sender1, RoutedEventArgs arg)
                {
                    Next_Button.IsEnabled = true;
                    Previous_Button.IsEnabled = true;
                };
            }

            
            
            if (uc.Name == "UserControl7")
            {
                (CurrentControl as UserControl7).CollectionUrlBox.Text = Up.CollectionUrl;
            }
            
        }
Beispiel #15
0
        public void TestMsaBenchMarkOnBralibase()
        {
            List <float> allQ  = new List <float>();
            List <float> allTC = new List <float>();

            string        fileDirectory = @"TestUtils\FASTA\RNA\k10";
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            MoleculeType     mt = MoleculeType.RNA;
            SimilarityMatrix similarityMatrix;
            int gapOpenPenalty   = -20;
            int gapExtendPenalty = -5;
            int kmerLength       = 4;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new InvalidDataException("Invalid molecular type");
            }


            foreach (DirectoryInfo fi in iD.GetDirectories())
            {
                foreach (FileInfo fiii in fi.GetFiles())
                {
                    String filePath = fiii.FullName;
                    Console.WriteLine(filePath);
                    ISequenceParser parser = new FastaParser();

                    IList <ISequence> orgSequences = parser.Parse(filePath);

                    List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

                    int numberOfSequences = orgSequences.Count;

                    Console.WriteLine("The number of sequences is: {0}", numberOfSequences);
                    Console.WriteLine("Original unaligned sequences are:");

                    PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                            (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                            profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                            numberOfPartitions, numberOfDegrees);

                    Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
                    for (int i = 0; i < msa.AlignedSequences.Count; ++i)
                    {
                        //Console.WriteLine(msa.AlignedSequences[i].ToString());
                    }
                    float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                    float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                    allQ.Add(scoreQ);
                    allTC.Add(scoreTC);
                    Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                    Console.WriteLine("Alignment score TC is: {0}", scoreTC);

                    if (allQ.Count % 1000 == 0)
                    {
                        Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
                        Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
                        Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
                    }
                    ((FastaParser)parser).Dispose();
                }
            }
            Console.WriteLine("number of datasets is: {0}", allQ.Count);
            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }
Beispiel #16
0
        /// <summary>
        /// ParseOne General test cases
        /// </summary>
        /// <param name="nodeName">Xml node name</param>
        /// <param name="addParam">Additional parameter</param>
        static void ParseReaderGeneralTestCases(string nodeName,
                                                AdditionalParameters addParam)
        {
            // Gets the expected sequence from the Xml
            string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                            Constants.FilePathNode);

            Assert.IsTrue(File.Exists(filePath));
            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: File Exists in the Path '{0}'.", filePath));

            ISequence   seqsObj   = null;
            FastaParser parserObj = new FastaParser();

            switch (addParam)
            {
            case AdditionalParameters.ParseOne:
                using (TextReader reader = new StreamReader(filePath))
                {
                    seqsObj = parserObj.ParseOne(reader);
                }
                break;

            case AdditionalParameters.ParseOneReadOnly:
                using (TextReader reader = new StreamReader(filePath))
                {
                    seqsObj = parserObj.ParseOne(reader, true);
                }
                break;

            case AdditionalParameters.ParseReader:
                using (TextReader reader = new StreamReader(filePath))
                {
                    seqsObj = (Sequence)parserObj.Parse(reader)[0];
                }
                break;

            case AdditionalParameters.ParseReaderReadOnly:
                using (TextReader reader = new StreamReader(filePath))
                {
                    seqsObj = (Sequence)parserObj.Parse(reader, true)[0];
                }
                break;

            default:
                break;
            }

            Assert.IsNotNull(seqsObj);
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: Number of Sequences found are '{0}'.",
                                                   seqsObj.Count.ToString((IFormatProvider)null)));

            string expectedSequence = Utility._xmlUtil.GetTextValue(
                Constants.SimpleFastaDnaNodeName,
                Constants.ExpectedSequenceNode);

            Assert.IsNotNull(seqsObj);
            Assert.AreEqual(expectedSequence, seqsObj.ToString());
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.",
                                                   seqsObj.ToString()));
            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine(string.Format(null,
                                            "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.",
                                            seqsObj.ToString()));

            Assert.IsNotNull(seqsObj.Alphabet);
            Assert.AreEqual(
                seqsObj.Alphabet.Name.ToLower(CultureInfo.CurrentCulture),
                Utility._xmlUtil.GetTextValue(Constants.SimpleFastaDnaNodeName,
                                              Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.",
                                                   seqsObj.Alphabet.Name));

            Assert.AreEqual(Utility._xmlUtil.GetTextValue(
                                Constants.SimpleFastaDnaNodeName,
                                Constants.SequenceIdNode), seqsObj.ID);
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.",
                                                   seqsObj.ID));
            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine(string.Format(null,
                                            "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.",
                                            seqsObj.ID));
        }
Beispiel #17
0
        public void TestMsaBenchMarkLargeDataset()
        {
            // Test on DNA benchmark dataset
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"TestUtils\FASTA\Protein\Balibase\RV913\BOX032.xml.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences);
            int numberOfSequences       = orgSequences.Count;

            String outputFilePath = @"tempBOX032.xml.afa";

            using (StreamWriter writer = new StreamWriter(outputFilePath, true))
            {
                foreach (ISequence sequence in sequences)
                {
                    writer.WriteLine(">" + sequence.ID);
                    // write sequence
                    BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);
                    for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60)
                    {
                        derivedSeq.RangeStart  = lineStart;
                        derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart);
                        writer.WriteLine(derivedSeq.ToString());
                    }
                    writer.Flush();
                }
            }

            sequences.Clear();
            sequences = parser.Parse(outputFilePath);

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(orgSequences[i].ToString());
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;
            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 3;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);

            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));

            if (File.Exists(outputFilePath))
            {
                File.Delete(outputFilePath);
            }
            ((FastaParser)parser).Dispose();
        }
        private void btn_Submit_Click(object sender, EventArgs e)
        {
            this.OutputSequences = "";
            if (string.IsNullOrEmpty(this.InputSequences))
            {
                MessageBox.Show("Please do not submit empty content. ");
                return;
            }
            FastaParser       parser       = new FastaParser(this.InputSequences);
            List <FastaEntry> fastaEntries = parser.ParseFasta();

            if (fastaEntries == null)
            {
                switch (this.Countpart)
                {
                case CountpartType.ReverseComplement:
                    this.OutputSequences = SequenceGenerator.ReverseComplementSequence(this.InputSequences);
                    break;

                case CountpartType.Reverse:
                    this.OutputSequences = SequenceGenerator.ReverseSequence(this.InputSequences);
                    break;

                case CountpartType.Complement:
                    this.OutputSequences = SequenceGenerator.ComplementSequence(this.InputSequences);
                    break;

                default:
                    MessageBox.Show("Please choose a counterpart. ");
                    break;
                }
                this.rtx_OutputSequences.Text = this.OutputSequences;
                return;
            }
            foreach (FastaEntry f in fastaEntries)
            {
                switch (this.Countpart)
                {
                case CountpartType.ReverseComplement:
                    f.Sequence = new StringBuilder(SequenceGenerator.ReverseComplementSequence(f.Sequence.ToString()));
                    break;

                case CountpartType.Reverse:
                    f.Sequence = new StringBuilder(SequenceGenerator.ReverseSequence(f.Sequence.ToString()));
                    break;

                case CountpartType.Complement:
                    f.Sequence = new StringBuilder(SequenceGenerator.ComplementSequence(f.Sequence.ToString()));
                    break;

                default:
                    MessageBox.Show("Please choose a counterpart. ");
                    break;
                }
            }
            foreach (FastaEntry entry in fastaEntries)
            {
                this.OutputSequences += entry.ToString();
            }
            this.rtx_OutputSequences.Text = this.OutputSequences;
        }
Beispiel #19
0
        public void FastaFormatter()
        {
            // Test with FASTA file from Simon

            string filepathOriginal = @"TestUtils\FASTA\NC_005213.ffn";

            Assert.IsTrue(File.Exists(filepathOriginal));
            FastaParser parser = null;

            try
            {
                parser = new FastaParser();
                FastaFormatter formatter = new FastaFormatter();

                // Read the original file
                IList <ISequence> seqsOriginal = null;
                parser       = new FastaParser();
                seqsOriginal = parser.Parse(filepathOriginal);
                Assert.IsNotNull(seqsOriginal);

                // Use the formatter to write the original sequences to a temp file
                string filepathTmp = Path.GetTempFileName();
                using (TextWriter writer = new StreamWriter(filepathTmp))
                {
                    foreach (Sequence s in seqsOriginal)
                    {
                        formatter.Format(s, writer);
                    }
                }

                // Read the new file, then compare the sequences
                IList <ISequence> seqsNew = null;
                parser  = new FastaParser();
                seqsNew = parser.Parse(filepathTmp);
                Assert.IsNotNull(seqsOriginal);

                // Now compare the sequences.
                int countOriginal = seqsOriginal.Count();
                int countNew      = seqsNew.Count();
                Assert.AreEqual(countOriginal, countNew);

                int i;
                for (i = 0; i < countOriginal; i++)
                {
                    Assert.AreEqual(seqsOriginal[i].ID, seqsNew[i].ID);
                    string orgSeq = seqsOriginal[i].ToString();
                    string newSeq = seqsNew[i].ToString();
                    Assert.AreEqual(orgSeq, newSeq);
                }
                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(filepathTmp);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Beispiel #20
0
        public void FastaForUniprotDutpase()
        {
            int    expectedSequenceCount = 2015;
            string filepath = @"testdata\FASTA\uniprot-dutpase.fasta";

            Assert.IsTrue(File.Exists(filepath));

            List <string> headers   = new List <string>();
            List <string> sequences = new List <string>();

            using (StreamReader reader = File.OpenText(filepath))
            {
                string        line = null;
                StringBuilder s    = null;
                while ((line = reader.ReadLine()) != null)
                {
                    if (line.StartsWith(">"))
                    {
                        if (s != null)
                        {
                            sequences.Add(s.ToString());
                            s = null;
                        }
                        headers.Add(line);
                    }
                    else
                    {
                        if (s == null)
                        {
                            s = new StringBuilder();
                        }
                        s.Append(line);
                    }
                }
                if (s != null)
                {
                    sequences.Add(s.ToString());
                    s = null;
                }
            }
            Assert.AreEqual(expectedSequenceCount, headers.Count);
            Assert.AreEqual(expectedSequenceCount, sequences.Count);

            IList <ISequence> seqs   = null;
            FastaParser       parser = new FastaParser();

            using (StreamReader reader = File.OpenText(filepath))
            {
                seqs = parser.Parse(reader);
            }
            Assert.IsNotNull(seqs);
            Assert.AreEqual(expectedSequenceCount, seqs.Count);

            for (int i = 0; i < expectedSequenceCount; i++)
            {
                Sequence seq = (Sequence)seqs[i];
                Assert.IsNotNull(seq);
                Assert.AreEqual(sequences[i], seq.ToString());
                Assert.AreEqual(sequences[i].Length, seq.EncodedValues.Length);
                Assert.AreEqual(headers[i].Substring(1), seq.ID);
            }
        }
Beispiel #21
0
        public void FastaFor186972391()
        {
            string expectedSequence =
                "IFYEPVEILGYDNKSSLVLVKRLITRMYQQKSLISSLNDSNQNEFWGHKNSFSSHFSSQMVSEGFGVILE" +
                "IPFSSRLVSSLEEKRIPKSQNLRSIHSIFPFLEDKLSHLNYVSDLLIPHPIHLEILVQILQCWIKDVPSL" +
                "HLLRLFFHEYHNLNSLITLNKSIYVFSKRKKRFFGFLHNSYVYECEYLFLFIRKKSSYLRSISSGVFLER" +
                "THFYGKIKYLLVVCCNSFQRILWFLKDTFIHYVRYQGKAIMASKGTLILMKKWKFHLVNFWQSYFHFWFQ" +
                "PYRINIKQLPNYSFSFLGYFSSVRKNPLVVRNQMLENSFLINTLTQKLDTIVPAISLIGSLSKAQFCTVL" +
                "GHPISKPIWTDLSDSDILDRFCRICRNLCRYHSGSSKKQVLYRIKYIFRLSCARTLARKHKSTVRTFMRR" +
                "LGSGFLEEFFLEEE";

            string filepath = @"TestUtils\FASTA\186972391.fasta";

            Assert.IsTrue(File.Exists(filepath));

            IList <ISequence> seqs   = null;
            FastaParser       parser = null;

            try
            {
                parser = new FastaParser();
                using (StreamReader reader = File.OpenText(filepath))
                {
                    seqs = parser.Parse(reader);
                }
                Assert.IsNotNull(seqs);
                Assert.AreEqual(1, seqs.Count);
                Sequence seq = (Sequence)seqs[0];
                Assert.IsNotNull(seq);
                Assert.AreEqual(expectedSequence, seq.ToString());
                byte[] tmpEncodedSeq = new byte[seq.Count];
                (seq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
                Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
                Assert.IsNotNull(seq.Alphabet);
                Assert.AreEqual(seq.Alphabet.Name, "Protein");

                Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID);

                // Try it again with ParseOne, from reader and from filename
                using (StreamReader reader = File.OpenText(filepath))
                {
                    seq = (Sequence)parser.ParseOne(reader);
                }
                Assert.IsNotNull(seq);
                Assert.AreEqual(expectedSequence, seq.ToString());
                tmpEncodedSeq = new byte[seq.Count];
                (seq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
                Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
                Assert.IsNotNull(seq.Alphabet);
                Assert.AreEqual(seq.Alphabet.Name, "Protein");

                Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID);

                seq = (Sequence)parser.ParseOne(filepath);

                Assert.IsNotNull(seq);
                Assert.AreEqual(expectedSequence, seq.ToString());
                tmpEncodedSeq = new byte[seq.Count];
                (seq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
                Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
                Assert.IsNotNull(seq.Alphabet);
                Assert.AreEqual(seq.Alphabet.Name, "Protein");

                Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID);
            }
            finally
            {
                if (parser != null)
                {
                    parser.Dispose();
                }
            }
        }
Beispiel #22
0
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            FastaParser fastaParser    = new FastaParser();
            ISequence   searchSequence = fastaParser.ParseOne(InputFile, true);

            NCBIBlastHandler service = new NCBIBlastHandler();

            ConfigParameters configParams = new ConfigParameters();

            configParams.UseBrowserProxy = true;
            service.Configuration        = configParams;

            BlastParameters searchParams = new BlastParameters();

            // fill in the BLAST settings:
            searchParams.Add("Program", "blastn");
            searchParams.Add("Database", "nr");
            // higher Expect will return more results
            searchParams.Add("Expect", "1e-10");
            searchParams.Add("CompositionBasedStatistics", "0");

            // create the request
            string jobID = service.SubmitRequest(searchSequence, searchParams);

            // query the status
            ServiceRequestInformation info = service.GetRequestStatus(jobID);

            if (info.Status != ServiceRequestStatus.Waiting &&
                info.Status != ServiceRequestStatus.Ready)
            {
                // TODO: Add error handling here
            }

            // get async results, poll until ready
            int maxAttempts = 10;
            int attempt     = 1;

            while (attempt <= maxAttempts &&
                   info.Status != ServiceRequestStatus.Error &&
                   info.Status != ServiceRequestStatus.Ready)
            {
                ++attempt;
                info = service.GetRequestStatus(jobID);
                Thread.Sleep(
                    info.Status == ServiceRequestStatus.Waiting || info.Status == ServiceRequestStatus.Queued
                    ? 20000 * attempt : 0);
            }

            // Get blast result.
            BlastXmlParser      blastParser = new BlastXmlParser();
            IList <BlastResult> results     = blastParser.Parse(new StringReader(service.GetResult(jobID, searchParams)));

            // Convert blast result to BlastCollator.
            List <BlastResultCollator> blastResultCollator = new List <BlastResultCollator>();

            foreach (BlastResult result in results)
            {
                foreach (BlastSearchRecord record in result.Records)
                {
                    if (null != record.Hits &&
                        0 < record.Hits.Count)
                    {
                        foreach (Hit hit in record.Hits)
                        {
                            if (null != hit.Hsps &&
                                0 < hit.Hsps.Count)
                            {
                                foreach (Hsp hsp in hit.Hsps)
                                {
                                    BlastResultCollator blast = new BlastResultCollator();
                                    blast.Alignment     = hsp.AlignmentLength;
                                    blast.Bit           = hsp.BitScore;
                                    blast.EValue        = hsp.EValue;
                                    blast.Identity      = hsp.IdentitiesCount;
                                    blast.Length        = hit.Length;
                                    blast.QEnd          = hsp.QueryEnd;
                                    blast.QStart        = hsp.QueryStart;
                                    blast.QueryId       = record.IterationQueryId;
                                    blast.SEnd          = hsp.HitEnd;
                                    blast.SStart        = hsp.HitStart;
                                    blast.SubjectId     = hit.Id;
                                    blast.Positives     = hsp.PositivesCount;
                                    blast.QueryString   = hsp.QuerySequence;
                                    blast.SubjectString = hsp.HitSequence;
                                    blast.Accession     = hit.Accession;
                                    blast.Description   = hit.Def;
                                    blastResultCollator.Add(blast);
                                }
                            }
                        }
                    }
                }
            }

            BlastXmlSerializer serializer = new BlastXmlSerializer();
            Stream             stream     = serializer.SerializeBlastOutput(blastResultCollator);

            // set result to the output property.
            BlastResult = GetSerializedData(stream);

            return(ActivityExecutionStatus.Closed);
        }
Beispiel #23
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(alignedSequences[i].ToString());
            }

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            // Test on case 3: 36 sequences
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"TestUtils\FASTA\RV11_BBS_all.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(sequences[i].ToString());
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(alignedSequences[k].ToString());
                    }
                }
                ((FastaParser)parser).Dispose();
            }
        }
Beispiel #24
0
        /// <summary>
        /// Validates most of the find matches suffix tree test cases with varying parameters.
        /// </summary>
        /// <param name="nodeName">Node name which needs to be read for execution.</param>
        /// <param name="isFilePath">Is File Path?</param>
        /// <param name="LISActionType">LIS action type enum</param>
        static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                            LISParameters LISActionType)
        {
            ISequence referenceSeq      = null;
            ISequence querySeq          = null;
            string    referenceSequence = string.Empty;
            string    querySequence     = string.Empty;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser        = new FastaParser();
                IList <ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq      = referenceSeqs[0];
                referenceSequence = referenceSeq.ToString();

                // Gets the reference sequence from the configurtion file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastaParser       queryParser = new FastaParser();
                IList <ISequence> querySeqs   = queryParser.Parse(queryFilePath);
                querySeq      = querySeqs[0];
                querySequence = querySeq.ToString();
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SequenceNode);

                string seqAlp = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(seqAlp),
                                            referenceSequence);

                querySequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.SearchSequenceNode);

                seqAlp = Utility._xmlUtil.GetTextValue(nodeName,
                                                       Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(seqAlp),
                                        querySequence);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.
            ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder();
            SequenceSuffixTree suffixTree        = suffixTreeBuilder.BuildSuffixTree(referenceSeq);

            IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, querySeq,
                                                                           long.Parse(mumLength, null));

            switch (LISActionType)
            {
            case LISParameters.FindUniqueMatches:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches");
                Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, LISActionType));
                break;

            case LISParameters.PerformLIS:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("MUMmer BVT : Validating the Unique Matches using LIS");
                LongestIncreasingSubsequence lisObj     = new LongestIncreasingSubsequence();
                IList <MaxUniqueMatch>       lisMatches = lisObj.GetLongestSequence(matches);
                Assert.IsTrue(ValidateUniqueMatches(lisMatches, nodeName, LISActionType));
                break;

            default:
                break;
            }

            Console.WriteLine(string.Format(null,
                                            "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                                            referenceSequence, querySequence));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "MUMmer BVT : Successfully validated the all the unique matches for the sequence '{0}' and '{1}'.",
                                                   referenceSequence, querySequence));
        }
Beispiel #25
0
        /// <summary>
        /// Parse BAM and validate parsed aligned sequences and its properties.
        /// </summary>
        /// <param name="nodeName">Different xml nodes used for different test cases</param>
        /// <param name="BAMParserPam">BAM Parse method parameters</param>
        /// <param name="IsEncoding">True for BAMParser ctor with encoding.
        /// False otherwise </param>
        void ValidateBAMParser(string nodeName,
                               BAMParserParameters BAMParserPam, bool IsEncoding,
                               bool IsReferenceIndex)
        {
            // Get input and output values from xml node.
            string bamFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.FilePathNode);
            string expectedAlignedSeqFilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequence);
            string refIndexValue = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.RefIndexNode);
            string startIndexValue = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.StartIndexNode);
            string endIndexValue = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.EndIndexNode);
            string alignedSeqCount = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.AlignedSeqCountNode);

            SequenceAlignmentMap seqAlignment = null;
            BAMParser            bamParser    = null;

            try
            {
                if (IsEncoding)
                {
                    bamParser = new BAMParser();
                }
                else
                {
                    bamParser = new BAMParser(Encodings.IupacNA);
                }
                // Parse a BAM file with different parameters.
                switch (BAMParserPam)
                {
                case BAMParserParameters.StreamReader:
                    using (Stream stream = new FileStream(bamFilePath, FileMode.Open,
                                                          FileAccess.Read))
                    {
                        seqAlignment = bamParser.Parse(stream);
                    }
                    break;

                case BAMParserParameters.StreamReaderWithReadOnly:
                    using (Stream stream = new FileStream(bamFilePath, FileMode.Open,
                                                          FileAccess.Read))
                    {
                        seqAlignment = bamParser.Parse(stream, false);
                    }
                    break;

                case BAMParserParameters.FileName:
                    seqAlignment = bamParser.Parse(bamFilePath);
                    break;

                case BAMParserParameters.FileNameWithReadOnly:
                    seqAlignment = bamParser.Parse(bamFilePath, false);
                    break;

                case BAMParserParameters.ParseRangeFileName:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null));
                    break;

                case BAMParserParameters.ParseRangeFileNameWithReadOnly:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null), false);
                    break;

                case BAMParserParameters.ParseRangeWithIndex:
                    seqAlignment = bamParser.ParseRange(bamFilePath,
                                                        Convert.ToInt32(refIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(startIndexValue, (IFormatProvider)null),
                                                        Convert.ToInt32(endIndexValue, (IFormatProvider)null), false);
                    break;
                }


                // Validate BAM Header record fileds.
                if (!IsReferenceIndex)
                {
                    ValidateBAMHeaderRecords(nodeName, seqAlignment);
                }

                IList <SAMAlignedSequence> alignedSeqs = seqAlignment.QuerySequences;

                Assert.AreEqual(alignedSeqCount, alignedSeqs.Count.ToString((IFormatProvider)null));

                // Get expected sequences
                using (FastaParser parserObj = new FastaParser())
                {
                    IList <ISequence> expectedSequences = parserObj.Parse(expectedAlignedSeqFilePath);

                    // Validate aligned sequences from BAM file.
                    for (int index = 0; index < alignedSeqs.Count; index++)
                    {
                        Assert.AreEqual(expectedSequences[index].ToString(),
                                        alignedSeqs[index].QuerySequence.ToString());

                        // Log to NUNIT GUI.
                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "BAM Parser BVT : Validated Aligned sequence :{0} successfully",
                                                               alignedSeqs[index].QuerySequence.ToString()));
                        Console.WriteLine(string.Format((IFormatProvider)null,
                                                        "BAM Parser BVT : Validated the aligned sequence :{0} successfully",
                                                        alignedSeqs[index].QuerySequence.ToString()));
                    }
                }
            }
            finally
            {
                bamParser.Dispose();
            }
        }
Beispiel #26
0
        /// <summary>
        /// Validates the Mummer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param>
        static void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath,
                                                        bool isSeqList)
        {
            ISequence         referenceSeq           = null;
            ISequence         querySeq               = null;
            IList <ISequence> querySeqs              = null;
            string            referenceSequence      = string.Empty;
            string            querySequence          = string.Empty;
            IList <IPairwiseSequenceAlignment> align = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser        = new FastaParser();
                IList <ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq      = referenceSeqs[0];
                referenceSequence = referenceSeq.ToString();

                // Gets the reference sequence from the configurtion file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastaParser queryParser = new FastaParser();
                querySeqs     = queryParser.Parse(queryFilePath);
                querySeq      = querySeqs[0];
                querySequence = querySeq.ToString();
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SequenceNode);

                string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                            Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                            referenceSequence);

                querySequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.SearchSequenceNode);

                referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                        querySequence);
                querySeqs = new List <ISequence>();
                querySeqs.Add(querySeq);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            MUMmer mum = new MUMmer3();

            mum.LengthOfMUM       = long.Parse(mumLength, null);
            mum.PairWiseAlgorithm = new NeedlemanWunschAligner();
            mum.GapOpenCost       = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode),
                                              (IFormatProvider)null);

            if (isSeqList)
            {
                querySeqs.Add(referenceSeq);
                align = mum.Align(querySeqs);
            }
            else
            {
                align = mum.AlignSimple(referenceSeq, querySeqs);
            }

            string expectedScore = Utility._xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName);

            Assert.AreEqual(expectedScore,
                            align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null));
            Console.WriteLine(string.Format(null,
                                            "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.",
                                            referenceSequence, querySequence));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.",
                                                   referenceSequence, querySequence));

            string[] expectedSequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                        Constants.ExpectedSequencesNode);

            IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>();

            IPairwiseSequenceAlignment seqAlign   = new PairwiseSequenceAlignment();
            PairwiseAlignedSequence    alignedSeq = new PairwiseAlignedSequence();

            alignedSeq.FirstSequence  = new Sequence(referenceSeq.Alphabet, expectedSequences[0]);
            alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]);
            alignedSeq.Score          = int.Parse(expectedScore);
            seqAlign.PairwiseAlignedSequences.Add(alignedSeq);
            expectedOutput.Add(seqAlign);
            Assert.IsTrue(CompareAlignment(align, expectedOutput));
            Console.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences.");
        }
Beispiel #27
0
        public void PerformPAMSAMPerf()
        {
            Stopwatch _watchObj = new Stopwatch();

            // Get input values from XML.
            string refPath =
                Utility._xmlUtil.GetTextValue(Constants.PamsamNode,
                                              Constants.RefFilePathNode);
            string queryPath =
                Utility._xmlUtil.GetTextValue(Constants.PamsamNode,
                                              Constants.QueryFilePathNode);

            // Create a List for input files.
            List <string> lstInputFiles = new List <string>();

            lstInputFiles.Add(refPath);
            lstInputFiles.Add(queryPath);

            // Parse a Reference and query sequence file.
            ISequenceParser   parser       = new FastaParser();
            IList <ISequence> refsequences = parser.Parse(queryPath);
            IList <ISequence> orgSequences = parser.Parse(refPath);

            // Execute UnAlign method to verify that it does not contains gap
            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            // Set static properties
            PAMSAMMultipleSequenceAligner.FasterVersion = true;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            // Set Alignment parameters.
            int gapOpenPenalty     = -13;
            int gapExtendPenalty   = -5;
            int kmerLength         = 2;
            int numberOfDegrees    = 2;
            int numberOfPartitions = 4;

            // Profile Distance function name
            DistanceFunctionTypes distanceFunctionName =
                DistanceFunctionTypes.EuclideanDistance;

            // Set Hierarchical clustering.
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName =
                UpdateDistanceMethodsTypes.Average;

            // Set NeedlemanWunschProfileAligner
            ProfileAlignerNames profileAlignerName =
                ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames profileProfileFunctionName =
                ProfileScoreFunctionNames.InnerProduct;

            // Create similarity matrix instance.
            SimilarityMatrix similarityMatrix =
                new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);

            // Reset stop watch and start timer.
            _watchObj.Reset();
            _watchObj.Start();
            long memoryStart = GC.GetTotalMemory(true);

            // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called
            // To test separately distance matrix, binary tree etc..
            // Set the parallel option using below ctor.
            msa = new PAMSAMMultipleSequenceAligner
                      (sequences, MoleculeType.DNA, kmerLength, distanceFunctionName,
                      hierarchicalClusteringMethodName, profileAlignerName,
                      profileProfileFunctionName, similarityMatrix, gapOpenPenalty,
                      gapExtendPenalty, numberOfPartitions, numberOfDegrees);

            // Stop watchclock.
            _watchObj.Stop();
            long memoryEnd = GC.GetTotalMemory(true);

            string memoryUsed = (memoryEnd - memoryStart).ToString();

            // Display all aligned sequence, performance and memory optimization nos.
            DisplayTestCaseHeader(lstInputFiles, _watchObj,
                                  memoryUsed, "PAMSAM");

            Console.WriteLine(string.Format(
                                  "PAMSAM SequenceAligner method, Alignment Score is : {0}",
                                  msa.AlignmentScore.ToString()));
            int index = 0;

            foreach (ISequence seq in msa.AlignedSequences)
            {
                Console.WriteLine(string.Format(
                                      "PAMSAM Aligned Seq {0}:{1}", index, seq.ToString()));
                index++;
            }
        }
Beispiel #28
0
        /// <summary>
        /// Validate the Mummer GetMUMs method for different test cases.
        /// </summary>
        /// <param name="nodeName">Name of the XML node to be read.</param>
        /// <param name="isFilePath">Is Sequence saved in File</param>
        /// <param name="isAfterLIS">Is Mummer execution after LIS</param>
        /// <param name="isLIS">Is Mummer execution with LIS option</param>
        static void ValidateMUMsGeneralTestCases(string nodeName, bool isFilePath,
                                                 bool isAfterLIS, bool isLIS)
        {
            ISequence         referenceSeq      = null;
            ISequence         querySeq          = null;
            IList <ISequence> querySeqs         = null;
            string            referenceSequence = string.Empty;
            string            querySequence     = string.Empty;

            if (isFilePath)
            {
                // Gets the reference sequence from the configurtion file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser        = new FastaParser();
                IList <ISequence> referenceSeqs = parser.Parse(filePath);
                referenceSeq      = referenceSeqs[0];
                referenceSequence = referenceSeq.ToString();

                // Gets the reference sequence from the configurtion file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath));

                FastaParser queryParser = new FastaParser();
                querySeqs     = queryParser.Parse(queryFilePath);
                querySeq      = querySeqs[0];
                querySequence = querySeq.ToString();
            }
            else
            {
                // Gets the reference sequence from the configurtion file
                referenceSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.SequenceNode);

                string referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                            Constants.AlphabetNameNode);

                referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                            referenceSequence);

                querySequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                              Constants.SearchSequenceNode);

                referenceSeqAlphabet = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceAlphabetNode);

                querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet),
                                        querySequence);
                querySeqs = new List <ISequence>();
                querySeqs.Add(querySeq);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            MUMmer mum = new MUMmer3();

            mum.LengthOfMUM = long.Parse(mumLength, null);
            IDictionary <ISequence, IList <MaxUniqueMatch> > actualResult = null;

            if (!isLIS)
            {
                actualResult = mum.GetMUMs(referenceSeq, querySeqs);
            }
            else
            {
                actualResult = mum.GetMUMs(referenceSeq, querySeqs, isAfterLIS);
            }

            // Validate MUMs output.
            Assert.IsTrue(ValidateMums(nodeName, actualResult, querySeq));

            Console.WriteLine("MUMmer BVT : Successfully validated the Mumms");
            ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the Mumms.");
        }
Beispiel #29
0
        /// <summary>
        /// Get perf, memory and cpu utilization nos for sequence operations
        /// Insert,Seq ctor,IndexOf,Add,Clone,Remove.
        /// </summary>
        /// <param name="sequence">Sequence string</param>
        private void GetSequencePerfNumber(ISequence sequence)
        {
            // Calculating Constructor Time and CPU utilization
            Stopwatch   _watchObj = new Stopwatch();
            FastaParser parserObj = new FastaParser();

            _watchObj.Reset();
            _watchObj.Start();
            Sequence seq = new Sequence(
                Alphabets.DNA,
                sequence.ToString());

            _watchObj.Stop();
            Console.WriteLine(string.Format("Constructor() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("Constructor() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));

            seq.IsReadOnly = false;

            // Calculating Insert method Time and CPU utilization
            _watchObj.Reset();
            _watchObj.Start();
            foreach (ISequenceItem item in sequence)
            {
                seq.Insert(1, item);
            }
            _watchObj.Stop();
            Console.WriteLine(string.Format("Insert() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("Insert() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));

            // Calculating IndexOf method Time and CPU utilization
            _watchObj.Reset();
            _watchObj.Start();
            foreach (ISequenceItem item in sequence)
            {
                seq.IndexOf(item);
            }
            _watchObj.Stop();
            Console.WriteLine(string.Format("IndexOf() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("IndexOf() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));

            // Calculating Add method Time and CPU utilization
            _watchObj.Reset();
            _watchObj.Start();
            foreach (ISequenceItem item in sequence)
            {
                seq.Add(item);
            }
            _watchObj.Stop();
            Console.WriteLine(string.Format("Add() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("Add() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));

            // Calculating Clone method Time and CPU utilization
            _watchObj.Reset();
            _watchObj.Start();
            Sequence seqClone = seq.Clone();

            _watchObj.Stop();
            Console.WriteLine(string.Format("Clone() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("Clone() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));

            // Calculating Remove method Time and CPU utilization
            _watchObj.Reset();
            _watchObj.Start();
            foreach (ISequenceItem item in sequence)
            {
                seq.Remove(item);
            }
            _watchObj.Stop();
            Console.WriteLine(string.Format("Remove() method Perf Time : {0} Secs",
                                            TimeSpan.FromMilliseconds(
                                                _watchObj.ElapsedMilliseconds).TotalSeconds.ToString()));
            Console.WriteLine(string.Format("Remove() method CPU Utilization : {0}",
                                            _cpuCounterObj.NextValue().ToString()));
        }
Beispiel #30
0
        /// <summary>
        /// Validates most of the find matches suffix tree test cases with varying parameters.
        /// </summary>
        /// <param name="nodeName">Node name which needs to be read for execution.</param>
        /// <param name="isFilePath">Is File Path?</param>
        /// <param name="additionalParam">LIS action type enum</param>
        static void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
                                                            AdditionalParameters additionalParam)
        {
            ISequence referenceSeqs = null;
            ISequence searchSeqs    = null;

            string[] referenceSequences = null;
            string[] searchSequences    = null;

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser       parser           = new FastaParser();
                IList <ISequence> referenceSeqList = parser.Parse(filePath);
                referenceSeqs = new SegmentedSequence(referenceSeqList);

                // Gets the query sequence from the FastA file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath));

                FastaParser       queryParser  = new FastaParser();
                IList <ISequence> querySeqList = queryParser.Parse(queryFilePath);
                searchSeqs = new SegmentedSequence(querySeqList);
            }
            else
            {
                // Gets the reference & search sequences from the configurtion file
                referenceSequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                    Constants.ReferenceSequencesNode);
                searchSequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                 Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                          Constants.AlphabetNameNode));

                List <ISequence> refSeqList    = new List <ISequence>();
                List <ISequence> searchSeqList = new List <ISequence>();
                for (int i = 0; i < referenceSequences.Length; i++)
                {
                    ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]);
                    refSeqList.Add(referSeq);
                }

                referenceSeqs = new SegmentedSequence(refSeqList);
                for (int i = 0; i < searchSequences.Length; i++)
                {
                    ISequence searchSeq = new Sequence(seqAlphabet, searchSequences[i]);
                    searchSeqList.Add(searchSeq);
                }

                searchSeqs = new SegmentedSequence(searchSeqList);
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);

            // Builds the suffix for the reference sequence passed.
            ISuffixTreeBuilder suffixTreeBuilder = new KurtzSuffixTreeBuilder();
            SequenceSuffixTree suffixTree        = suffixTreeBuilder.BuildSuffixTree(referenceSeqs);

            IList <MaxUniqueMatch> matches = suffixTreeBuilder.FindMatches(suffixTree, searchSeqs,
                                                                           long.Parse(mumLength, null));

            switch (additionalParam)
            {
            case AdditionalParameters.FindUniqueMatches:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine("NUCmer BVT : Validating the Unique Matches");
                Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath));
                Console.WriteLine(
                    "NUCmer BVT : Successfully validated the all the unique matches for the sequences.");
                break;

            case AdditionalParameters.PerformClusterBuilder:
                // Validates the Unique Matches.
                ApplicationLog.WriteLine(
                    "NUCmer BVT : Validating the Unique Matches using Cluster Builder");
                Assert.IsTrue(ValidateUniqueMatches(matches, nodeName, additionalParam, isFilePath));
                Console.WriteLine(
                    "NUCmer BVT : Successfully validated the all the cluster builder matches for the sequences.");
                break;

            default:
                break;
            }


            ApplicationLog.WriteLine(
                "NUCmer BVT : Successfully validated the all the unique matches for the sequences.");
        }
Beispiel #31
0
        /// <summary>
        /// Parse General test cases for Data Virtualization
        /// </summary>
        /// <param name="nodeName">Xml node name</param>
        /// <param name="addParam">Additional parameter</param>
        static void ParseGeneralTestCases(string nodeName,
                                          AdditionalParameters addParam)
        {
            // Gets the expected sequence from the Xml
            string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                            Constants.FilePathNode);

            Assert.IsTrue(File.Exists(filePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Parser BVT: File Exists in the Path '{0}'.", filePath));
            Console.WriteLine(string.Format(null,
                                            "FastA Parser BVT: File Exists in the Path '{0}'.", filePath));

            IList <ISequence> seqsList  = null;
            FastaParser       parserObj = new FastaParser();

            parserObj.EnforceDataVirtualization = true;
            Sequence parseOneSeq = null;

            string expectedSequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                                    Constants.ExpectedSequenceNode);

            string[] expectedSequences = expectedSequence.Split(',');

            // Gets the SequenceAlignment list based on the parameters.
            switch (addParam)
            {
            case AdditionalParameters.Parse:
            case AdditionalParameters.Properties:
                seqsList = parserObj.Parse(filePath);
                break;

            case AdditionalParameters.ParseOne:
                parseOneSeq = (Sequence)parserObj.ParseOne(filePath);
                break;

            case AdditionalParameters.ParseReadOnly:
                seqsList = parserObj.Parse(filePath,
                                           false);
                break;

            case AdditionalParameters.ParseOneReadOnly:
                parseOneSeq = (Sequence)parserObj.ParseOne(filePath,
                                                           false);
                break;

            default:
                break;
            }

            // Check if ParseOne or Parse was used for parsing
            if (null == seqsList)
            {
                seqsList = new List <ISequence>();
                seqsList.Add(parseOneSeq);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "FastA Parser BVT: Number of Sequences found are '{0}'.",
                                                       seqsList.Count.ToString((IFormatProvider)null)));
                Console.WriteLine(string.Format(null,
                                                "FastA Parser BVT: Number of Sequences found are '{0}'.",
                                                seqsList.Count.ToString((IFormatProvider)null)));
            }
            else
            {
                Assert.IsNotNull(seqsList);
                Assert.AreEqual(2, seqsList.Count);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "FastA Parser BVT: Number of Sequences found are '{0}'.",
                                                       seqsList.Count.ToString((IFormatProvider)null)));
                Console.WriteLine(string.Format(null,
                                                "FastA Parser BVT: Number of Sequences found are '{0}'.",
                                                seqsList.Count.ToString((IFormatProvider)null)));
            }

            // Validating by setting the BlockSize and MaxNumberOfBlocks
            int seqNumber = 0;

            foreach (Sequence seq in seqsList)
            {
                //seq.BlockSize = 5;
                seq.MaxNumberOfBlocks = 5;
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < seq.Count; i++)
                {
                    sb.Append(seq[i].Symbol.ToString());
                }

                switch (addParam)
                {
                case AdditionalParameters.Properties:
                    //Assert.AreEqual(5, seq.BlockSize);
                    Assert.AreEqual(5, seq.MaxNumberOfBlocks);
                    ApplicationLog.WriteLine("FastA Parser BVT: The Properties are as expected.");
                    Console.WriteLine("FastA Parser BVT: The Properties are as expected.");
                    break;

                default:
                    Assert.AreEqual(expectedSequences[seqNumber], sb.ToString());

                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.",
                                                           expectedSequences[seqNumber]));
                    // Logs to the NUnit GUI (Console.Out) window
                    Console.WriteLine(string.Format(null,
                                                    "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse() is found to be as expected.",
                                                    expectedSequences[seqNumber]));

                    Assert.AreEqual(expectedSequences[seqNumber].Length, seq.Count);
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Parser BVT: The FASTA Length sequence '{0}' is as expected.",
                                                           expectedSequences[seqNumber].Length));

                    string[] alphabets = Utility._xmlUtil.GetTextValue(nodeName,
                                                                       Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture).Split(',');
                    Assert.IsNotNull(seq.Alphabet);
                    Assert.AreEqual(seq.Alphabet.Name.ToLower(CultureInfo.CurrentCulture),
                                    alphabets[seqNumber]);
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.",
                                                           seq.Alphabet.Name));

                    string[] seqIDs = Utility._xmlUtil.GetTextValue(nodeName,
                                                                    Constants.SequenceIdNode).ToLower(CultureInfo.CurrentCulture).Split('/');
                    Assert.AreEqual(seqIDs[seqNumber].ToLower(CultureInfo.CurrentCulture)
                                    , seq.ID.ToLower(CultureInfo.CurrentCulture));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.",
                                                           seq.ID));
                    // Logs to the NUnit GUI (Console.Out) window
                    Console.WriteLine(string.Format(null,
                                                    "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.",
                                                    seq.ID));
                    break;
                }
                seqNumber++;
            }
        }
Beispiel #32
0
        /// <summary>
        /// Validates the NUCmer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        /// <param name="isFilePath">Is Sequence saved in File</param>
        static void ValidateNUCmerAlignGeneralTestCases(string nodeName, bool isFilePath)
        {
            string[]          referenceSequences = null;
            string[]          searchSequences    = null;
            IList <ISequence> refSeqList         = new List <ISequence>();
            IList <ISequence> searchSeqList      = new List <ISequence>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                Constants.FilePathNode);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "NUCmer BVT : Successfully validated the File Path '{0}'.", filePath));

                FastaParser parser = new FastaParser();
                refSeqList = parser.Parse(filePath);

                // Gets the query sequence from the FastA file
                string queryFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.SearchSequenceFilePathNode);

                Assert.IsNotNull(queryFilePath);
                ApplicationLog.WriteLine(string.Format(null,
                                                       "NUCmer BVT : Successfully validated the File Path '{0}'.", queryFilePath));

                FastaParser queryParser = new FastaParser();
                searchSeqList = queryParser.Parse(queryFilePath);
            }
            else
            {
                // Gets the reference & search sequences from the configurtion file
                referenceSequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                    Constants.ReferenceSequencesNode);
                searchSequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                 Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                          Constants.AlphabetNameNode));

                for (int i = 0; i < referenceSequences.Length; i++)
                {
                    ISequence referSeq = new Sequence(seqAlphabet, referenceSequences[i]);
                    refSeqList.Add(referSeq);
                }

                for (int i = 0; i < searchSequences.Length; i++)
                {
                    ISequence searchSeq = new Sequence(seqAlphabet, searchSequences[i]);
                    searchSeqList.Add(searchSeq);
                }
            }

            string mumLength = Utility._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            NUCmer nucmerObj = new NUCmer3();

            nucmerObj.MaximumSeparation = 0;
            nucmerObj.MinimumScore      = 2;
            nucmerObj.SeparationFactor  = 0.12f;
            nucmerObj.BreakLength       = 2;
            nucmerObj.LengthOfMUM       = long.Parse(mumLength, null);

            IList <IPairwiseSequenceAlignment> align = nucmerObj.Align(refSeqList, searchSeqList);

            string expectedSequences = string.Empty;
            string actualSequences   = string.Empty;

            if (isFilePath)
            {
                expectedSequences = Utility._xmlUtil.GetFileTextValue(nodeName,
                                                                      Constants.ExpectedSequencesNode);
            }
            else
            {
                expectedSequences = Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.ExpectedSequencesNode);
            }

            // Gets all the aligned sequences in comma seperated format
            foreach (IPairwiseSequenceAlignment seqAlignment in align)
            {
                foreach (PairwiseAlignedSequence alignedSeq in seqAlignment)
                {
                    actualSequences = string.Concat(actualSequences,
                                                    alignedSeq.FirstSequence.ToString(), ",");
                    actualSequences = string.Concat(actualSequences,
                                                    alignedSeq.SecondSequence.ToString(), ",");
                }
            }

            Assert.AreEqual(expectedSequences, actualSequences.Substring(0, actualSequences.Length - 1));

            Console.WriteLine("NUCmer BVT : Successfully validated all the aligned sequences.");
            ApplicationLog.WriteLine("NUCmer BVT : Successfully validated all the aligned sequences.");
        }
Beispiel #33
0
        /// <summary>
        /// Validate submit job and FetchResultAsync() using multiple input sequences
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        void ValidateFetchResultAsync(string nodeName)
        {
            // Read input from config file
            string filepath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string emailId = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.EmailIDNode);
            string clusterOption = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ClusterOptionNode);
            string actionAlign = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.ActionAlignNode);

            ConfigParameters configparams  = new ConfigParameters();
            ClustalWParser   clustalparser = new ClustalWParser();

            configparams.UseBrowserProxy = true;
            TestIClustalWServiceHandler handler =
                new TestIClustalWServiceHandler(clustalparser, configparams);

            ClustalWParameters parameters = new ClustalWParameters();

            parameters.Values[ClustalWParameters.Email]         = emailId;
            parameters.Values[ClustalWParameters.ClusterOption] = clusterOption;
            parameters.Values[ClustalWParameters.ActionAlign]   = actionAlign;

            IList <ISequence> sequence = null;

            // Get input sequences
            using (FastaParser parser = new FastaParser())
            {
                sequence = parser.Parse(filepath);
            }

            // Submit job and validate it returned valid job id and control id
            ServiceParameters svcparameters = handler.SubmitRequest(sequence, parameters);

            Assert.IsTrue(string.IsNullOrEmpty(svcparameters.JobId));
            Console.WriteLine(string.Concat("JobId:", svcparameters.JobId));
            foreach (string key in svcparameters.Parameters.Keys)
            {
                Assert.IsTrue(string.IsNullOrEmpty(svcparameters.Parameters[key].ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null, "{0}:{1}",
                                                key, svcparameters.Parameters[key].ToString()));
            }

            // Get the results and validate it is not null.
            ClustalWResult            result     = null;
            int                       retrycount = 0;
            ServiceRequestInformation info;

            do
            {
                info = handler.GetRequestStatus(svcparameters);
                if (info.Status == ServiceRequestStatus.Ready)
                {
                    break;
                }

                Thread.Sleep(
                    info.Status == ServiceRequestStatus.Waiting ||
                    info.Status == ServiceRequestStatus.Queued ?
                    Constants.ClusterRetryInterval * retrycount : 0);

                retrycount++;
            }while (retrycount < 10);

            if (info.Status == ServiceRequestStatus.Ready)
            {
                result = handler.FetchResultsAsync(svcparameters);
            }
            Assert.IsNotNull(result);
            Assert.IsNotNull(result.SequenceAlignment);
            foreach (IAlignedSequence alignSeq in result.SequenceAlignment.AlignedSequences)
            {
                Console.WriteLine("Aligned Sequence Sequences : ");
                ApplicationLog.WriteLine("Aligned Sequence Sequences : ");
                foreach (ISequence seq in alignSeq.Sequences)
                {
                    Console.WriteLine(string.Concat("Sequence:", seq.ToString()));
                    ApplicationLog.WriteLine(string.Concat("Sequence:", seq.ToString()));
                }
            }
            Console.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is 
      successfully completed using FetchResultAsync()");
            ApplicationLog.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results 
      is successfully completed using FetchResultAsync()");
        }
Beispiel #34
0
        public void TestHierarchicalClusteringSerial()
        {
            int             dimension      = 4;
            IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension);

            for (int i = 0; i < distanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < distanceMatrix.Dimension; ++j)
                {
                    distanceMatrix[i, j] = i + j;
                    distanceMatrix[j, i] = i + j;
                }
            }

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix);

            Assert.AreEqual(7, hierarchicalClustering.Nodes.Count);
            for (int i = 0; i < dimension * 2 - 1; ++i)
            {
                Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID);
            }

            for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID);
                Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID);
            }

            // Test on sequences
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            int kmerLength = 4;
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            //Console.WriteLine(kmerDistanceMatrixGenerator.Name);
            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);
            //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j)
                {
                    Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);
            for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment);
            }

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment);
            }


            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID);
            //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID);
            //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID);
            //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID);
            //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID);
            //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID);


            // Test on larger dataset
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\RV11_BBS_all.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID);
            }
        }