Ejemplo n.º 1
0
        public void ValidateBasicDerivedSequenceWithFastaFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode);
            string fastAFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleFastaNodeName, Constants.FilePathNode);

            Assert.IsTrue(File.Exists(fastAFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The File exist in the Path ", fastAFilePath));
            IList <ISequence> sequence = null;

            using (FastaParser parser = new FastaParser())
            {
                //Parse a FastA file Using Parse method and convert the same to sequence.
                sequence = parser.Parse(fastAFilePath);
            }

            Assert.IsNotNull(sequence);
            Sequence fastASequence = (Sequence)sequence[0];

            Assert.IsNotNull(fastASequence);
            Assert.AreEqual(expectedSequence, fastASequence.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The Sequence is as expected."));

            byte[] tmpEncodedSeq = new byte[fastASequence.Count];
            (fastASequence as IList <byte>).CopyTo(tmpEncodedSeq, 0);
            Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The Sequence Length is as expected."));

            Assert.AreEqual(_utilityObj._xmlUtil.GetTextValue(Constants.SimpleProteinAlphabetNode, Constants.SequenceIdNode), fastASequence.ID);
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: SequenceID is as expected."));


            Assert.AreEqual(fastASequence.Alphabet.Name, _utilityObj._xmlUtil.GetTextValue(
                                Constants.SimpleFastaNodeName, Constants.AlphabetNameNode));
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The Sequence Alphabet is as expected."));

            // Create a derived Sequences for the fastA file sequence.
            BasicDerivedSequence fastADerivedSeq =
                new BasicDerivedSequence(fastASequence, false, false, -1, -1);

            // validate the DerivedSequence with originalSequence.
            Assert.IsNotNull(fastADerivedSeq);
            Assert.AreEqual(expectedSequence, fastADerivedSeq.ToString());
            Assert.AreEqual(fastASequence.ToString(), fastADerivedSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The BasicDerived Sequence is as expected."));

            // Logs to Nunit GUI.
            Console.WriteLine(
                "Sequence BVT: Validation of FastaA file Sequence is completed successfully.");
        }
Ejemplo n.º 2
0
        public void ValidateBasicDerivedSequence()
        {
            // Gets the actual sequence and the alphabet from the Xml
            string alphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode);
            string actualSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.ExpectedNormalString);

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabetName));

            Sequence createSequence = new Sequence(
                Utility.GetAlphabet(alphabetName), actualSequence);

            Assert.IsNotNull(createSequence);

            // Validate the createdSequence
            Assert.AreEqual(createSequence.ToString(), actualSequence);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString()));

            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString()));

            Assert.AreEqual(Utility.GetAlphabet(alphabetName), createSequence.Alphabet);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence Alphabet is '{0}' and is as expected.",
                                                   createSequence.Alphabet.Name));

            //Create a BasicDerived Sequence.
            BasicDerivedSequence derivedSequence = new BasicDerivedSequence(
                createSequence, false, false, -1, -1);

            //Validate the DerivedSequence
            Assert.AreEqual(createSequence.ToString(), derivedSequence.ToString());
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is as expected.", derivedSequence.ToString()));
            Assert.IsNotNull(derivedSequence);


            Assert.AreEqual(derivedSequence.ToString(), actualSequence);
            Assert.AreEqual(Utility.GetAlphabet(alphabetName), derivedSequence.Alphabet);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence Alphabet is '{0}' and is as expected.",
                                                   derivedSequence.Alphabet.Name));

            // Logs to the NUnit GUI (Console.Out) window
            ApplicationLog.WriteLine(
                "Sequence BVT: The BasicDerived Sequence validation is completed successfully.");
        }
Ejemplo n.º 3
0
        public void TestBasicDerivedSequenceWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("BasicDerivedSequence.data", FileMode.Create);
                BinaryFormatter formatter = new BinaryFormatter();

                string   id   = Guid.NewGuid().ToString();
                Sequence seq1 = new Sequence(Alphabets.RNA, "ACUGCA");
                seq1.ID            = id;
                seq1.DisplayID     = "displayid";
                seq1.Documentation = "document";
                BasicDerivedSequence seq = new BasicDerivedSequence(seq1, true, true, -1, -1);

                formatter.Serialize(stream, seq);
                stream.Seek(0, SeekOrigin.Begin);
                BasicDerivedSequence deserializedSeq = (BasicDerivedSequence)formatter.Deserialize(stream);

                Assert.AreNotSame(seq, deserializedSeq);
                Assert.AreSame(seq.Alphabet, deserializedSeq.Alphabet);
                Assert.AreEqual(seq.Complement.ToString(), deserializedSeq.Complement.ToString());
                Assert.AreEqual(seq.Complemented, deserializedSeq.Complemented);
                Assert.AreEqual(seq.Count, deserializedSeq.Count);
                Assert.AreEqual(seq.DisplayID, deserializedSeq.DisplayID);
                Assert.AreEqual(seq.Documentation, deserializedSeq.Documentation);
                Assert.AreEqual(seq.ID, deserializedSeq.ID);
                Assert.AreEqual(seq.IsReadOnly, deserializedSeq.IsReadOnly);
                Assert.AreEqual(seq.MoleculeType, deserializedSeq.MoleculeType);
                Assert.AreEqual(seq.RangeLength, deserializedSeq.RangeLength);
                Assert.AreEqual(seq.RangeStart, deserializedSeq.RangeStart);
                Assert.AreEqual(seq.Reverse.ToString(), deserializedSeq.Reverse.ToString());
                Assert.AreEqual(seq.ReverseComplement.ToString(), deserializedSeq.ReverseComplement.ToString());
                Assert.AreEqual(seq.Reversed, deserializedSeq.Reversed);
                Assert.AreEqual(seq.Source.ToString(), deserializedSeq.Source.ToString());
                Assert.AreEqual(seq.ToString(), deserializedSeq.ToString());
            }
            catch (Exception)
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }
Ejemplo n.º 4
0
        public void ValidateReverse()
        {
            // Gets the actual sequence and the alphabet from the Xml
            string alphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode);
            string actualSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.ExpectedNormalString);
            string expectedRevSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.ExpectedReverseSequence);
            string expectedDnaNormalSequenceCount = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.EncodedDnaNormalSequenceCount);
            string revDerSeq = string.Empty;

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabetName));

            Sequence createSequence = new Sequence(Utility.GetAlphabet(alphabetName), actualSequence);

            Assert.IsNotNull(createSequence);

            // Validate the createdSequence
            Assert.AreEqual(createSequence.ToString(), actualSequence);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString()));

            // Logs to the NUnit GUI (Console.Out) window
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString()));

            Assert.AreEqual(Utility.GetAlphabet(alphabetName), createSequence.Alphabet);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence Alphabet is '{0}' and is as expected.", createSequence.Alphabet.Name));

            // Create a BasicDerived Sequence.
            BasicDerivedSequence derivedSequence = new BasicDerivedSequence(
                createSequence, false, false, -1, -1);

            // Validate the Reverse of DerivedSequence.
            revDerSeq = derivedSequence.Reverse.ToString();
            Assert.AreEqual(revDerSeq.Length.ToString((IFormatProvider)null),
                            expectedDnaNormalSequenceCount);
            Assert.AreEqual(revDerSeq, expectedRevSequence);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Reverse sequence {0} is expected", revDerSeq));

            // Logs to the NUnit GUI (Console.Out) window
            ApplicationLog.WriteLine(
                "Sequence BVT: The Reverse of the sequence is validated successfully.");
        }
Ejemplo n.º 5
0
        public void ValidateBasicDerivedSequenceWithGenBankFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode);
            string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.FilePathNode);

            Assert.IsTrue(File.Exists(geneBankFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath));

            // Parse a GenBank file Using Parse method and convert the same to sequence.
            ISequenceParser parser = new GenBankParser();

            IList <ISequence> sequence = parser.Parse(geneBankFilePath);

            Assert.IsNotNull(sequence);
            Sequence geneBankSeq = (Sequence)sequence[0];

            Assert.IsNotNull(geneBankSeq);
            Assert.AreEqual(expectedSequence, geneBankSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence is as expected."));

            byte[] tmpEncodedSeq = new byte[geneBankSeq.Count];
            (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
            Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence Length is as expected."));

            // Create a derived Sequences for the fastA file sequence.
            BasicDerivedSequence genebankDerivedSeq =
                new BasicDerivedSequence(geneBankSeq, false, false, -1, -1);

            // validate the DerivedSequence with originalSequence.
            Assert.IsNotNull(genebankDerivedSeq);
            Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString());
            Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The BasicDerived Sequence is as expected."));

            // Logs to Nunit GUI.
            Console.WriteLine(
                "Sequence BVT: Validation of GenBank file Sequence is completed successfully.");
        }
Ejemplo n.º 6
0
        public void ValidateDerivedSequenceFindMatches()
        {
            ISequence            seq    = new Sequence(Alphabets.DNA, "ACCGGTT");
            BasicDerivedSequence bdsObj =
                new BasicDerivedSequence(seq, false, false, 0, seq.Count - 1);
            IList <string> patterns = new List <string>();

            patterns.Add("ACCG");
            IDictionary <string, IList <int> > actual = bdsObj.FindMatches(patterns, 0, false);

            Assert.AreEqual(1, actual.Count);
            Assert.AreEqual(0, actual["ACCG"][0]);

            Console.WriteLine("Derived Sequence : Successfully validated FindMatches() method.");
            ApplicationLog.WriteLine("Derived Sequence : Successfully validated FindMatches() method.");
        }
Ejemplo n.º 7
0
        public void BasicDerivedSequenceClone()
        {
            ApplicationLog.WriteLine("BasicDerivedSequenceClone test started");

            string seqData = "GCCAACGAACCGGAAACCCGGGACCG";

            Sequence             orgSeq          = new Sequence(Alphabets.DNA, seqData);
            BasicDerivedSequence basicDerivedSeq = new BasicDerivedSequence(orgSeq, false, false, 0, 0);

            ApplicationLog.WriteLine("Original Sequence: " + basicDerivedSeq.ToString());
            BasicDerivedSequence basicDerivedSeqClone = basicDerivedSeq.Clone();

            Assert.AreEqual(basicDerivedSeq.ToString(), basicDerivedSeqClone.ToString());
            ApplicationLog.WriteLine("Cloned Sequence: " + basicDerivedSeqClone.ToString());

            ApplicationLog.WriteLine("BasicDerivedSequenceClone test completed");
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Writes an ISequence to a FASTA file in the location specified by the writer.
        /// </summary>
        /// <param name="sequence">The sequence to format.</param>
        /// <param name="writer">The TextWriter used to write the formatted sequence text.</param>
        public override void Format(ISequence sequence, TextWriter writer)
        {
            // write header
            writer.WriteLine(">" + sequence.ID);

            // write sequence
            BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);

            for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxLineLength)
            {
                derivedSeq.RangeStart  = lineStart;
                derivedSeq.RangeLength = Math.Min(_maxLineLength, sequence.Count - lineStart);
                writer.WriteLine(derivedSeq.ToString());
            }

            writer.Flush();
        }
Ejemplo n.º 9
0
        // writes the sequence to the sepecified writer.
        private void WriteSeqData(ISequence sequence, string type, TextWriter writer)
        {
            if (sequence.Count > 0)
            {
                type = GetGenericTypeString(sequence.MoleculeType);

                WriteHeaderLine(writer, type, sequence.DisplayID);

                BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);
                for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxSequenceSymbolsPerLine)
                {
                    derivedSeq.RangeStart  = lineStart;
                    derivedSeq.RangeLength = Math.Min(_maxSequenceSymbolsPerLine, sequence.Count - lineStart);
                    WriteHeaderLine(writer, derivedSeq.ToString().ToLower(CultureInfo.InvariantCulture));
                }

                WriteHeaderLine(writer, "end-" + type);
            }
        }
Ejemplo n.º 10
0
        //
        // Given a list of sequences, create a new list with only the Reverse Complements
        //   of the original sequences.
        static IList <ISequence> ReverseComplementSequenceList(IList <ISequence> sequenceList)
        {
            List <ISequence> updatedSequenceList = new List <ISequence>();

            foreach (ISequence seq in sequenceList)
            {
                ISequence seqReverseComplement = seq.ReverseComplement;
                //
                // DISCUSSION:
                //   Should there be an easily accessed indicator that this is a reversed sequence?
                //   And should we be able to get the 'base' version even if it is a sub-sequence?
                //
                BasicDerivedSequence derivedSeq = seqReverseComplement as BasicDerivedSequence;
                if (derivedSeq != null)
                {
                    derivedSeq.DisplayID = derivedSeq.DisplayID + " Reverse";
                    //                    seqReverseComplement.DisplayID = seqReverseComplement.DisplayID + " Reverse";
                }
                updatedSequenceList.Add(seqReverseComplement);
            }
            return(updatedSequenceList);
        }
Ejemplo n.º 11
0
        //
        // Given a list of sequences, create a new list with the orginal sequence followed
        // by the Reverse Complement of that sequence.
        static IList <ISequence> AddReverseComplementsToSequenceList(IList <ISequence> sequenceList)
        {
            List <ISequence> updatedSequenceList = new List <ISequence>();

            foreach (ISequence seq in sequenceList)
            {
                ISequence seqReverseComplement = seq.ReverseComplement;
                //
                // DISCUSSION:
                //   Should there be an easily accessed indicator that this is a reversed sequence?
                //
                BasicDerivedSequence derivedSeq = seqReverseComplement as BasicDerivedSequence;
                if (derivedSeq != null)
                {
                    derivedSeq.DisplayID = derivedSeq.DisplayID + " Reverse";
                    //                    seqReverseComplement.DisplayID = seqReverseComplement.DisplayID + " Reverse";
                }
                //seqReverseComplement.ID = seqReverseComplement.ID + " Reverse";
                updatedSequenceList.Add(seq);
                updatedSequenceList.Add(seqReverseComplement);
            }
            return(updatedSequenceList);
        }
Ejemplo n.º 12
0
        public void TestMsaBenchMarkLargeDataset()
        {
            // Test on DNA benchmark dataset
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\Protein\Balibase\RV913\BOX032.xml.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences);
            int numberOfSequences       = orgSequences.Count;

            String outputFilePath = @"tempBOX032.xml.afa";

            StreamWriter writer = new StreamWriter(outputFilePath, true);

            foreach (ISequence sequence in sequences)
            {
                writer.WriteLine(">" + sequence.ID);
                // write sequence
                BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);
                for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60)
                {
                    derivedSeq.RangeStart  = lineStart;
                    derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart);
                    writer.WriteLine(derivedSeq.ToString());
                }
                writer.Flush();
            }
            writer.Close();

            sequences.Clear();
            sequences = parser.Parse(outputFilePath);

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(orgSequences[i].ToString());
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;
            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 3;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);

            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));

            if (File.Exists(outputFilePath))
            {
                File.Delete(outputFilePath);
            }
        }
Ejemplo n.º 13
0
        public void TestListOfByteOnISequence()
        {
            // Create each sequence type and send to the test method which will call all methods of IList<byte>

            Sequence sequence = new Sequence(DnaAlphabet.Instance, "ACT");

            sequence.IsReadOnly = false;
            TestListOfByteMethods(sequence);

            sequence             = new Sequence(DnaAlphabet.Instance, "ACT");
            sequence.UseEncoding = true;
            TestListOfByteMethods(sequence);

            BasicDerivedSequence basicDerived = new BasicDerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"), false, false, 0, sequence.Count);

            TestListOfByteMethods(basicDerived);

            basicDerived             = new BasicDerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"), false, false, 0, sequence.Count);
            basicDerived.UseEncoding = true;
            TestListOfByteMethods(basicDerived);

            DerivedSequence derived = new DerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"));

            TestListOfByteMethods(derived);

            derived             = new DerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"));
            derived.UseEncoding = true;
            TestListOfByteMethods(derived);

            QualitativeSequence qualitative = new QualitativeSequence(DnaAlphabet.Instance, FastQFormatType.Illumina, "ACT");

            qualitative.IsReadOnly = false;
            TestListOfByteMethods(qualitative);

            qualitative             = new QualitativeSequence(DnaAlphabet.Instance, FastQFormatType.Illumina, "ACT");
            qualitative.UseEncoding = true;
            TestListOfByteMethods(qualitative);

            SegmentedSequence segmented = new SegmentedSequence(new List <ISequence> {
                new Sequence(DnaAlphabet.Instance, "A")
                {
                    IsReadOnly = false
                },
                new Sequence(DnaAlphabet.Instance, "CT")
                {
                    IsReadOnly = false
                }
            });

            TestListOfByteMethods(segmented);

            SparseSequence sparse = new SparseSequence(DnaAlphabet.Instance, 0,
                                                       new List <ISequenceItem>
            {
                DnaAlphabet.Instance.A, DnaAlphabet.Instance.C, DnaAlphabet.Instance.T
            });

            sparse.IsReadOnly = false;
            TestListOfByteMethods(sparse);

            // No test code for VirtualSequence
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Returns a sequence which contains bases from the specified sequence as specified by the location.
        /// If the location contains accession then the sequence from the referredSequences which matches the
        /// accession of the location will be considered.
        ///
        /// For example,
        /// if location is "join(100..200, J00089.1:10..50, J00090.2:30..40)"
        /// then bases from 100 to 200 will be considered from the sequence parameter and referredSequences will
        /// be searched for the J00089.1 and J00090.2 accession if found then those sequences will be considered
        /// for constructing the output sequence.
        /// If the referred sequence is not found in the referredSequences then an exception will occur.
        /// </summary>
        /// <param name="location">Location instance.</param>
        /// <param name="sequence">Sequence instance from which the sub sequence has to be returned.</param>
        /// <param name="referredSequences">A dictionary containing Accession numbers as keys and Sequences as values, this will be used when
        /// the location or sublocations contains accession.</param>
        public ISequence GetSubSequence(ILocation location, ISequence sequence, Dictionary <string, ISequence> referredSequences)
        {
            if (location == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameLocation);
            }

            if (sequence == null)
            {
                throw new ArgumentNullException(Resource.ParameterNameSequence);
            }

            BasicDerivedSequence basicDerSeq = new BasicDerivedSequence(sequence, false, false, -1, -1);

            if (location.Operator == LocationOperator.Complement)
            {
                if (location.SubLocations.Count > 1)
                {
                    throw new ArgumentException(Resource.ComplementWithMorethanOneSubLocs);
                }

                if (location.SubLocations.Count > 0)
                {
                    basicDerSeq.Source = location.SubLocations[0].GetSubSequence(sequence, referredSequences);
                }
                else
                {
                    basicDerSeq.Source = GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences);
                }

                basicDerSeq.Complemented = true;
                return(new Sequence(sequence.Alphabet, basicDerSeq.ToString()));
            }

            if (location.Operator == LocationOperator.Order)
            {
                List <ISequence> subSequences = new List <ISequence>();
                if (location.SubLocations.Count > 0)
                {
                    foreach (ILocation loc in location.SubLocations)
                    {
                        subSequences.Add(loc.GetSubSequence(sequence, referredSequences));
                    }
                }
                else
                {
                    basicDerSeq.Source = GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences);
                    subSequences.Add(new Sequence(sequence.Alphabet, basicDerSeq.ToString()));
                }

                return(new SegmentedSequence(subSequences));
            }

            if (location.Operator == LocationOperator.Join)
            {
                if (location.SubLocations.Count > 0)
                {
                    List <ISequence> subSequences = new List <ISequence>();
                    foreach (ILocation loc in location.SubLocations)
                    {
                        subSequences.Add(loc.GetSubSequence(sequence, referredSequences));
                    }

                    Sequence seq = new Sequence(sequence.Alphabet);
                    foreach (ISequence subSeq in subSequences)
                    {
                        seq.InsertRange(seq.Count, subSeq.ToString());
                    }

                    return(seq);
                }
                else
                {
                    return(GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences));
                }
            }

            if (location.SubLocations.Count > 0)
            {
                throw new ArgumentException(Resource.NoneWithSubLocs);
            }

            return(GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences));
        }
Ejemplo n.º 15
0
        // The headers for all sequences go at the top of the file before any features.
        private void WriteHeaders(ICollection <ISequence> sequenceList, TextWriter writer)
        {
            // look for file-scope data tha is common to all sequences; null signifies no match
            string source   = null;
            string version  = null;
            string type     = null;
            bool   firstSeq = true;

            foreach (ISequence sequence in sequenceList)
            {
                if (firstSeq)
                {
                    // source and version go together; can't output one without the other
                    if (sequence.Metadata.ContainsKey("source") && sequence.Metadata.ContainsKey("version"))
                    {
                        source  = sequence.Metadata["source"] as string;
                        version = sequence.Metadata["version"] as string;
                    }

                    // map to generic string; e.g. mRNA, tRNA -> RNA
                    type = GetGenericTypeString(sequence.MoleculeType);

                    firstSeq = false;
                }
                else
                {
                    // source and version go together; can't output one without the other
                    if (source != null)
                    {
                        bool sourceAndVersionMatchOthers =
                            sequence.Metadata.ContainsKey("source") &&
                            sequence.Metadata.ContainsKey("version") &&
                            source == sequence.Metadata["source"] as string &&
                            version == sequence.Metadata["version"] as string;

                        // set both to null if this seq source and version don't match previous ones
                        if (!sourceAndVersionMatchOthers)
                        {
                            source  = null;
                            version = null;
                        }
                    }

                    // set type to null if this seq type doesn't match previous types
                    if (type != null && type != GetGenericTypeString(sequence.MoleculeType))
                    {
                        type = null;
                    }
                }
            }

            // formatting using gff version 2
            WriteHeaderLine(writer, "gff-version", "2");

            // only output source if they all match
            if (source != null)
            {
                WriteHeaderLine(writer, "source-version", source, version);
            }

            // today's date
            WriteHeaderLine(writer, "date", DateTime.Today.ToString("yyyy-MM-dd"));

            // type header
            if (type == null)
            {
                foreach (ISequence sequence in sequenceList)
                {
                    type = GetGenericTypeString(sequence.MoleculeType);

                    // only ouput seq-specific type header if this seq won't have its type
                    // output as part of a sequence data header; don't need to output if DNA,
                    // as DNA is default
                    if (type != MoleculeType.DNA.ToString() &&
                        (!ShouldWriteSequenceData || sequence.Count == 0))
                    {
                        WriteHeaderLine(writer, "type", type, sequence.DisplayID);
                    }
                }
            }
            else
            {
                // output that the types all match; don't need to output if DNA, as DNA is default
                if (type != MoleculeType.DNA.ToString())
                {
                    WriteHeaderLine(writer, "type", type);
                }
            }

            // sequence data
            if (ShouldWriteSequenceData)
            {
                foreach (ISequence sequence in sequenceList)
                {
                    if (sequence.Count > 0)
                    {
                        type = GetGenericTypeString(sequence.MoleculeType);

                        WriteHeaderLine(writer, type, sequence.DisplayID);

                        BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);
                        for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxSequenceSymbolsPerLine)
                        {
                            derivedSeq.RangeStart  = lineStart;
                            derivedSeq.RangeLength = Math.Min(_maxSequenceSymbolsPerLine, sequence.Count - lineStart);
                            WriteHeaderLine(writer, derivedSeq.ToString().ToLower());
                        }

                        WriteHeaderLine(writer, "end-" + type);
                    }
                }
            }

            // sequence-region header
            foreach (ISequence sequence in sequenceList)
            {
                if (sequence.Metadata.ContainsKey("start") && sequence.Metadata.ContainsKey("end"))
                {
                    WriteHeaderLine(writer, "sequence-region", sequence.DisplayID,
                                    sequence.Metadata["start"] as string, sequence.Metadata["end"] as string);
                }
            }
        }