public void ValidateBasicDerivedSequenceWithFastaFormat() { // Gets the expected sequence from the Xml string expectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string fastAFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(fastAFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The File exist in the Path ", fastAFilePath)); IList <ISequence> sequence = null; using (FastaParser parser = new FastaParser()) { //Parse a FastA file Using Parse method and convert the same to sequence. sequence = parser.Parse(fastAFilePath); } Assert.IsNotNull(sequence); Sequence fastASequence = (Sequence)sequence[0]; Assert.IsNotNull(fastASequence); Assert.AreEqual(expectedSequence, fastASequence.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[fastASequence.Count]; (fastASequence as IList <byte>).CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The Sequence Length is as expected.")); Assert.AreEqual(_utilityObj._xmlUtil.GetTextValue(Constants.SimpleProteinAlphabetNode, Constants.SequenceIdNode), fastASequence.ID); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: SequenceID is as expected.")); Assert.AreEqual(fastASequence.Alphabet.Name, _utilityObj._xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The Sequence Alphabet is as expected.")); // Create a derived Sequences for the fastA file sequence. BasicDerivedSequence fastADerivedSeq = new BasicDerivedSequence(fastASequence, false, false, -1, -1); // validate the DerivedSequence with originalSequence. Assert.IsNotNull(fastADerivedSeq); Assert.AreEqual(expectedSequence, fastADerivedSeq.ToString()); Assert.AreEqual(fastASequence.ToString(), fastADerivedSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The BasicDerived Sequence is as expected.")); // Logs to Nunit GUI. Console.WriteLine( "Sequence BVT: Validation of FastaA file Sequence is completed successfully."); }
public void ValidateBasicDerivedSequence() { // Gets the actual sequence and the alphabet from the Xml string alphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode); string actualSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.ExpectedNormalString); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabetName)); Sequence createSequence = new Sequence( Utility.GetAlphabet(alphabetName), actualSequence); Assert.IsNotNull(createSequence); // Validate the createdSequence Assert.AreEqual(createSequence.ToString(), actualSequence); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString())); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString())); Assert.AreEqual(Utility.GetAlphabet(alphabetName), createSequence.Alphabet); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence Alphabet is '{0}' and is as expected.", createSequence.Alphabet.Name)); //Create a BasicDerived Sequence. BasicDerivedSequence derivedSequence = new BasicDerivedSequence( createSequence, false, false, -1, -1); //Validate the DerivedSequence Assert.AreEqual(createSequence.ToString(), derivedSequence.ToString()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is as expected.", derivedSequence.ToString())); Assert.IsNotNull(derivedSequence); Assert.AreEqual(derivedSequence.ToString(), actualSequence); Assert.AreEqual(Utility.GetAlphabet(alphabetName), derivedSequence.Alphabet); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence Alphabet is '{0}' and is as expected.", derivedSequence.Alphabet.Name)); // Logs to the NUnit GUI (Console.Out) window ApplicationLog.WriteLine( "Sequence BVT: The BasicDerived Sequence validation is completed successfully."); }
public void TestBasicDerivedSequenceWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("BasicDerivedSequence.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); string id = Guid.NewGuid().ToString(); Sequence seq1 = new Sequence(Alphabets.RNA, "ACUGCA"); seq1.ID = id; seq1.DisplayID = "displayid"; seq1.Documentation = "document"; BasicDerivedSequence seq = new BasicDerivedSequence(seq1, true, true, -1, -1); formatter.Serialize(stream, seq); stream.Seek(0, SeekOrigin.Begin); BasicDerivedSequence deserializedSeq = (BasicDerivedSequence)formatter.Deserialize(stream); Assert.AreNotSame(seq, deserializedSeq); Assert.AreSame(seq.Alphabet, deserializedSeq.Alphabet); Assert.AreEqual(seq.Complement.ToString(), deserializedSeq.Complement.ToString()); Assert.AreEqual(seq.Complemented, deserializedSeq.Complemented); Assert.AreEqual(seq.Count, deserializedSeq.Count); Assert.AreEqual(seq.DisplayID, deserializedSeq.DisplayID); Assert.AreEqual(seq.Documentation, deserializedSeq.Documentation); Assert.AreEqual(seq.ID, deserializedSeq.ID); Assert.AreEqual(seq.IsReadOnly, deserializedSeq.IsReadOnly); Assert.AreEqual(seq.MoleculeType, deserializedSeq.MoleculeType); Assert.AreEqual(seq.RangeLength, deserializedSeq.RangeLength); Assert.AreEqual(seq.RangeStart, deserializedSeq.RangeStart); Assert.AreEqual(seq.Reverse.ToString(), deserializedSeq.Reverse.ToString()); Assert.AreEqual(seq.ReverseComplement.ToString(), deserializedSeq.ReverseComplement.ToString()); Assert.AreEqual(seq.Reversed, deserializedSeq.Reversed); Assert.AreEqual(seq.Source.ToString(), deserializedSeq.Source.ToString()); Assert.AreEqual(seq.ToString(), deserializedSeq.ToString()); } catch (Exception) { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
public void ValidateReverse() { // Gets the actual sequence and the alphabet from the Xml string alphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode); string actualSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.ExpectedNormalString); string expectedRevSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.ExpectedReverseSequence); string expectedDnaNormalSequenceCount = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.EncodedDnaNormalSequenceCount); string revDerSeq = string.Empty; // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabetName)); Sequence createSequence = new Sequence(Utility.GetAlphabet(alphabetName), actualSequence); Assert.IsNotNull(createSequence); // Validate the createdSequence Assert.AreEqual(createSequence.ToString(), actualSequence); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString())); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is as expected.", createSequence.ToString())); Assert.AreEqual(Utility.GetAlphabet(alphabetName), createSequence.Alphabet); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence Alphabet is '{0}' and is as expected.", createSequence.Alphabet.Name)); // Create a BasicDerived Sequence. BasicDerivedSequence derivedSequence = new BasicDerivedSequence( createSequence, false, false, -1, -1); // Validate the Reverse of DerivedSequence. revDerSeq = derivedSequence.Reverse.ToString(); Assert.AreEqual(revDerSeq.Length.ToString((IFormatProvider)null), expectedDnaNormalSequenceCount); Assert.AreEqual(revDerSeq, expectedRevSequence); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Reverse sequence {0} is expected", revDerSeq)); // Logs to the NUnit GUI (Console.Out) window ApplicationLog.WriteLine( "Sequence BVT: The Reverse of the sequence is validated successfully."); }
public void ValidateBasicDerivedSequenceWithGenBankFormat() { // Gets the expected sequence from the Xml string expectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode); string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(geneBankFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath)); // Parse a GenBank file Using Parse method and convert the same to sequence. ISequenceParser parser = new GenBankParser(); IList <ISequence> sequence = parser.Parse(geneBankFilePath); Assert.IsNotNull(sequence); Sequence geneBankSeq = (Sequence)sequence[0]; Assert.IsNotNull(geneBankSeq); Assert.AreEqual(expectedSequence, geneBankSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[geneBankSeq.Count]; (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence Length is as expected.")); // Create a derived Sequences for the fastA file sequence. BasicDerivedSequence genebankDerivedSeq = new BasicDerivedSequence(geneBankSeq, false, false, -1, -1); // validate the DerivedSequence with originalSequence. Assert.IsNotNull(genebankDerivedSeq); Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString()); Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The BasicDerived Sequence is as expected.")); // Logs to Nunit GUI. Console.WriteLine( "Sequence BVT: Validation of GenBank file Sequence is completed successfully."); }
public void ValidateDerivedSequenceFindMatches() { ISequence seq = new Sequence(Alphabets.DNA, "ACCGGTT"); BasicDerivedSequence bdsObj = new BasicDerivedSequence(seq, false, false, 0, seq.Count - 1); IList <string> patterns = new List <string>(); patterns.Add("ACCG"); IDictionary <string, IList <int> > actual = bdsObj.FindMatches(patterns, 0, false); Assert.AreEqual(1, actual.Count); Assert.AreEqual(0, actual["ACCG"][0]); Console.WriteLine("Derived Sequence : Successfully validated FindMatches() method."); ApplicationLog.WriteLine("Derived Sequence : Successfully validated FindMatches() method."); }
public void BasicDerivedSequenceClone() { ApplicationLog.WriteLine("BasicDerivedSequenceClone test started"); string seqData = "GCCAACGAACCGGAAACCCGGGACCG"; Sequence orgSeq = new Sequence(Alphabets.DNA, seqData); BasicDerivedSequence basicDerivedSeq = new BasicDerivedSequence(orgSeq, false, false, 0, 0); ApplicationLog.WriteLine("Original Sequence: " + basicDerivedSeq.ToString()); BasicDerivedSequence basicDerivedSeqClone = basicDerivedSeq.Clone(); Assert.AreEqual(basicDerivedSeq.ToString(), basicDerivedSeqClone.ToString()); ApplicationLog.WriteLine("Cloned Sequence: " + basicDerivedSeqClone.ToString()); ApplicationLog.WriteLine("BasicDerivedSequenceClone test completed"); }
/// <summary> /// Writes an ISequence to a FASTA file in the location specified by the writer. /// </summary> /// <param name="sequence">The sequence to format.</param> /// <param name="writer">The TextWriter used to write the formatted sequence text.</param> public override void Format(ISequence sequence, TextWriter writer) { // write header writer.WriteLine(">" + sequence.ID); // write sequence BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxLineLength) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(_maxLineLength, sequence.Count - lineStart); writer.WriteLine(derivedSeq.ToString()); } writer.Flush(); }
// writes the sequence to the sepecified writer. private void WriteSeqData(ISequence sequence, string type, TextWriter writer) { if (sequence.Count > 0) { type = GetGenericTypeString(sequence.MoleculeType); WriteHeaderLine(writer, type, sequence.DisplayID); BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxSequenceSymbolsPerLine) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(_maxSequenceSymbolsPerLine, sequence.Count - lineStart); WriteHeaderLine(writer, derivedSeq.ToString().ToLower(CultureInfo.InvariantCulture)); } WriteHeaderLine(writer, "end-" + type); } }
// // Given a list of sequences, create a new list with only the Reverse Complements // of the original sequences. static IList <ISequence> ReverseComplementSequenceList(IList <ISequence> sequenceList) { List <ISequence> updatedSequenceList = new List <ISequence>(); foreach (ISequence seq in sequenceList) { ISequence seqReverseComplement = seq.ReverseComplement; // // DISCUSSION: // Should there be an easily accessed indicator that this is a reversed sequence? // And should we be able to get the 'base' version even if it is a sub-sequence? // BasicDerivedSequence derivedSeq = seqReverseComplement as BasicDerivedSequence; if (derivedSeq != null) { derivedSeq.DisplayID = derivedSeq.DisplayID + " Reverse"; // seqReverseComplement.DisplayID = seqReverseComplement.DisplayID + " Reverse"; } updatedSequenceList.Add(seqReverseComplement); } return(updatedSequenceList); }
// // Given a list of sequences, create a new list with the orginal sequence followed // by the Reverse Complement of that sequence. static IList <ISequence> AddReverseComplementsToSequenceList(IList <ISequence> sequenceList) { List <ISequence> updatedSequenceList = new List <ISequence>(); foreach (ISequence seq in sequenceList) { ISequence seqReverseComplement = seq.ReverseComplement; // // DISCUSSION: // Should there be an easily accessed indicator that this is a reversed sequence? // BasicDerivedSequence derivedSeq = seqReverseComplement as BasicDerivedSequence; if (derivedSeq != null) { derivedSeq.DisplayID = derivedSeq.DisplayID + " Reverse"; // seqReverseComplement.DisplayID = seqReverseComplement.DisplayID + " Reverse"; } //seqReverseComplement.ID = seqReverseComplement.ID + " Reverse"; updatedSequenceList.Add(seq); updatedSequenceList.Add(seqReverseComplement); } return(updatedSequenceList); }
public void TestMsaBenchMarkLargeDataset() { // Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\Protein\Balibase\RV913\BOX032.xml.afa"; IList <ISequence> orgSequences = parser.Parse(filepath); IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; String outputFilePath = @"tempBOX032.xml.afa"; StreamWriter writer = new StreamWriter(outputFilePath, true); foreach (ISequence sequence in sequences) { writer.WriteLine(">" + sequence.ID); // write sequence BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart); writer.WriteLine(derivedSeq.ToString()); } writer.Flush(); } writer.Close(); sequences.Clear(); sequences = parser.Parse(outputFilePath); Console.WriteLine("Original sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(sequences[i].ToString()); } Console.WriteLine("Benchmark sequences are:"); for (int i = 0; i < numberOfSequences; ++i) { Console.WriteLine(orgSequences[i].ToString()); } PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = true; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 3; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty)); Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA); for (int i = 0; i < msa.AlignedSequencesA.Count; ++i) { Console.WriteLine(msa.AlignedSequencesA[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences)); Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB); for (int i = 0; i < msa.AlignedSequencesB.Count; ++i) { Console.WriteLine(msa.AlignedSequencesB[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences)); Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC); for (int i = 0; i < msa.AlignedSequencesC.Count; ++i) { Console.WriteLine(msa.AlignedSequencesC[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences)); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { Console.WriteLine(msa.AlignedSequences[i].ToString()); } Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences)); Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences)); if (File.Exists(outputFilePath)) { File.Delete(outputFilePath); } }
public void TestListOfByteOnISequence() { // Create each sequence type and send to the test method which will call all methods of IList<byte> Sequence sequence = new Sequence(DnaAlphabet.Instance, "ACT"); sequence.IsReadOnly = false; TestListOfByteMethods(sequence); sequence = new Sequence(DnaAlphabet.Instance, "ACT"); sequence.UseEncoding = true; TestListOfByteMethods(sequence); BasicDerivedSequence basicDerived = new BasicDerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"), false, false, 0, sequence.Count); TestListOfByteMethods(basicDerived); basicDerived = new BasicDerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT"), false, false, 0, sequence.Count); basicDerived.UseEncoding = true; TestListOfByteMethods(basicDerived); DerivedSequence derived = new DerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT")); TestListOfByteMethods(derived); derived = new DerivedSequence(new Sequence(DnaAlphabet.Instance, "ACT")); derived.UseEncoding = true; TestListOfByteMethods(derived); QualitativeSequence qualitative = new QualitativeSequence(DnaAlphabet.Instance, FastQFormatType.Illumina, "ACT"); qualitative.IsReadOnly = false; TestListOfByteMethods(qualitative); qualitative = new QualitativeSequence(DnaAlphabet.Instance, FastQFormatType.Illumina, "ACT"); qualitative.UseEncoding = true; TestListOfByteMethods(qualitative); SegmentedSequence segmented = new SegmentedSequence(new List <ISequence> { new Sequence(DnaAlphabet.Instance, "A") { IsReadOnly = false }, new Sequence(DnaAlphabet.Instance, "CT") { IsReadOnly = false } }); TestListOfByteMethods(segmented); SparseSequence sparse = new SparseSequence(DnaAlphabet.Instance, 0, new List <ISequenceItem> { DnaAlphabet.Instance.A, DnaAlphabet.Instance.C, DnaAlphabet.Instance.T }); sparse.IsReadOnly = false; TestListOfByteMethods(sparse); // No test code for VirtualSequence }
/// <summary> /// Returns a sequence which contains bases from the specified sequence as specified by the location. /// If the location contains accession then the sequence from the referredSequences which matches the /// accession of the location will be considered. /// /// For example, /// if location is "join(100..200, J00089.1:10..50, J00090.2:30..40)" /// then bases from 100 to 200 will be considered from the sequence parameter and referredSequences will /// be searched for the J00089.1 and J00090.2 accession if found then those sequences will be considered /// for constructing the output sequence. /// If the referred sequence is not found in the referredSequences then an exception will occur. /// </summary> /// <param name="location">Location instance.</param> /// <param name="sequence">Sequence instance from which the sub sequence has to be returned.</param> /// <param name="referredSequences">A dictionary containing Accession numbers as keys and Sequences as values, this will be used when /// the location or sublocations contains accession.</param> public ISequence GetSubSequence(ILocation location, ISequence sequence, Dictionary <string, ISequence> referredSequences) { if (location == null) { throw new ArgumentNullException(Resource.ParameterNameLocation); } if (sequence == null) { throw new ArgumentNullException(Resource.ParameterNameSequence); } BasicDerivedSequence basicDerSeq = new BasicDerivedSequence(sequence, false, false, -1, -1); if (location.Operator == LocationOperator.Complement) { if (location.SubLocations.Count > 1) { throw new ArgumentException(Resource.ComplementWithMorethanOneSubLocs); } if (location.SubLocations.Count > 0) { basicDerSeq.Source = location.SubLocations[0].GetSubSequence(sequence, referredSequences); } else { basicDerSeq.Source = GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences); } basicDerSeq.Complemented = true; return(new Sequence(sequence.Alphabet, basicDerSeq.ToString())); } if (location.Operator == LocationOperator.Order) { List <ISequence> subSequences = new List <ISequence>(); if (location.SubLocations.Count > 0) { foreach (ILocation loc in location.SubLocations) { subSequences.Add(loc.GetSubSequence(sequence, referredSequences)); } } else { basicDerSeq.Source = GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences); subSequences.Add(new Sequence(sequence.Alphabet, basicDerSeq.ToString())); } return(new SegmentedSequence(subSequences)); } if (location.Operator == LocationOperator.Join) { if (location.SubLocations.Count > 0) { List <ISequence> subSequences = new List <ISequence>(); foreach (ILocation loc in location.SubLocations) { subSequences.Add(loc.GetSubSequence(sequence, referredSequences)); } Sequence seq = new Sequence(sequence.Alphabet); foreach (ISequence subSeq in subSequences) { seq.InsertRange(seq.Count, subSeq.ToString()); } return(seq); } else { return(GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences)); } } if (location.SubLocations.Count > 0) { throw new ArgumentException(Resource.NoneWithSubLocs); } return(GetSubSequence(location.Start, location.End, location.Accession, location.Separator, sequence, referredSequences)); }
// The headers for all sequences go at the top of the file before any features. private void WriteHeaders(ICollection <ISequence> sequenceList, TextWriter writer) { // look for file-scope data tha is common to all sequences; null signifies no match string source = null; string version = null; string type = null; bool firstSeq = true; foreach (ISequence sequence in sequenceList) { if (firstSeq) { // source and version go together; can't output one without the other if (sequence.Metadata.ContainsKey("source") && sequence.Metadata.ContainsKey("version")) { source = sequence.Metadata["source"] as string; version = sequence.Metadata["version"] as string; } // map to generic string; e.g. mRNA, tRNA -> RNA type = GetGenericTypeString(sequence.MoleculeType); firstSeq = false; } else { // source and version go together; can't output one without the other if (source != null) { bool sourceAndVersionMatchOthers = sequence.Metadata.ContainsKey("source") && sequence.Metadata.ContainsKey("version") && source == sequence.Metadata["source"] as string && version == sequence.Metadata["version"] as string; // set both to null if this seq source and version don't match previous ones if (!sourceAndVersionMatchOthers) { source = null; version = null; } } // set type to null if this seq type doesn't match previous types if (type != null && type != GetGenericTypeString(sequence.MoleculeType)) { type = null; } } } // formatting using gff version 2 WriteHeaderLine(writer, "gff-version", "2"); // only output source if they all match if (source != null) { WriteHeaderLine(writer, "source-version", source, version); } // today's date WriteHeaderLine(writer, "date", DateTime.Today.ToString("yyyy-MM-dd")); // type header if (type == null) { foreach (ISequence sequence in sequenceList) { type = GetGenericTypeString(sequence.MoleculeType); // only ouput seq-specific type header if this seq won't have its type // output as part of a sequence data header; don't need to output if DNA, // as DNA is default if (type != MoleculeType.DNA.ToString() && (!ShouldWriteSequenceData || sequence.Count == 0)) { WriteHeaderLine(writer, "type", type, sequence.DisplayID); } } } else { // output that the types all match; don't need to output if DNA, as DNA is default if (type != MoleculeType.DNA.ToString()) { WriteHeaderLine(writer, "type", type); } } // sequence data if (ShouldWriteSequenceData) { foreach (ISequence sequence in sequenceList) { if (sequence.Count > 0) { type = GetGenericTypeString(sequence.MoleculeType); WriteHeaderLine(writer, type, sequence.DisplayID); BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0); for (int lineStart = 0; lineStart < sequence.Count; lineStart += _maxSequenceSymbolsPerLine) { derivedSeq.RangeStart = lineStart; derivedSeq.RangeLength = Math.Min(_maxSequenceSymbolsPerLine, sequence.Count - lineStart); WriteHeaderLine(writer, derivedSeq.ToString().ToLower()); } WriteHeaderLine(writer, "end-" + type); } } } // sequence-region header foreach (ISequence sequence in sequenceList) { if (sequence.Metadata.ContainsKey("start") && sequence.Metadata.ContainsKey("end")) { WriteHeaderLine(writer, "sequence-region", sequence.DisplayID, sequence.Metadata["start"] as string, sequence.Metadata["end"] as string); } } }