public void ValidateDnaSparseSequenceConstAlpIndexByteList() { byte[] byteArrayObj = encodingObj.GetBytes("AGCT"); IEnumerable <byte> seqItems = new List <Byte>() { byteArrayObj[0], byteArrayObj[1], byteArrayObj[2], byteArrayObj[3] }; SparseSequence sparseSeq = new SparseSequence(Alphabets.DNA, 4, seqItems); Assert.IsNotNull(sparseSeq); Assert.IsNotNull(sparseSeq.Statistics); Assert.AreEqual(8, sparseSeq.Count); SequenceStatistics seqStatObj = sparseSeq.Statistics; Assert.AreEqual(1, seqStatObj.GetCount('A')); Assert.AreEqual(1, seqStatObj.GetCount('G')); Assert.AreEqual(1, seqStatObj.GetCount('C')); Assert.AreEqual(1, seqStatObj.GetCount('T')); Console.WriteLine("SparseSequence BVT: Validation of SparseSequence(alp, index, seq items) constructor is completed"); ApplicationLog.WriteLine("SparseSequence BVT: Validation of SparseSequence(alp, index, seq items) constructor is completed"); }
/// <summary> /// Calculate GC content for a read /// </summary> /// <param name="seqStats">SequenceStatistics object</param> /// <param name="length">The length of the read</param> /// <returns>GC percentage of a read</returns> public double GCContentBySequence(SequenceStatistics seqStats, long length) { if (seqStats == null) { throw new ArgumentNullException("seqStats"); } return(100 * (double)(seqStats.GetCount('G') + seqStats.GetCount('C') + seqStats.GetCount('S')) / length); }
public void CreateStatsWithMixedcaseLetterSequence() { ISequence sequence = new Sequence(Alphabets.DNA, "aAaAaAaA"); SequenceStatistics stats = new SequenceStatistics(sequence); Assert.AreEqual(8, stats.GetCount('A')); Assert.AreEqual(8, stats.GetCount(65)); Assert.AreEqual(8, stats.GetCount('a')); Assert.AreEqual(1.0, stats.GetFraction('A')); Assert.AreEqual(1.0, stats.GetFraction(65)); Assert.AreEqual(1.0, stats.GetFraction('a')); }
public void CreateSimpleStatsWithSingleLetterSequence() { ISequence sequence = new Sequence(Alphabets.DNA, "A"); SequenceStatistics stats = new SequenceStatistics(sequence); Assert.AreEqual(1, stats.GetCount('A')); Assert.AreEqual(1, stats.GetCount(65)); Assert.AreEqual(0, stats.GetCount('C')); Assert.AreEqual(0, stats.GetCount('G')); Assert.AreEqual(0, stats.GetCount('T')); Assert.AreEqual(1.0, stats.GetFraction('A')); Assert.AreEqual(1.0, stats.GetFraction(65)); }
static void DumpStatsForOneSequence() { ISequence oneSequence = null; var parser = SequenceParsers.FindParserByFileName(Filename); if (parser != null) { oneSequence = parser.Parse().First(); parser.Close(); } if (oneSequence == null) { Console.WriteLine("Could not load sequence."); return; } SequenceStatistics stats = new SequenceStatistics(oneSequence); foreach (var symbol in oneSequence.Alphabet) { Console.WriteLine("{0} = Count={1}, Fraction={2}", (char)symbol, stats.GetCount(symbol), stats.GetFraction(symbol)); } }
public void ValidateDnaSparseSequenceConstAlpIndexByte() { byte[] byteArrayObj = Encoding.ASCII.GetBytes("AGCT"); var sparseSeq = new SparseSequence(Alphabets.DNA, 1, byteArrayObj[0]); Assert.IsNotNull(sparseSeq); Assert.IsNotNull(sparseSeq.Statistics); SequenceStatistics seqStatObj = sparseSeq.Statistics; Assert.AreEqual(1, seqStatObj.GetCount('A')); ApplicationLog.WriteLine("SparseSequence BVT: Validation of SparseSequence(alp, index, byte) constructor is completed"); }
/// <summary> /// When implemented in a derived class, performs the execution of the activity. /// </summary> /// <param name="context">The execution context under which the activity executes.</param> protected override string Execute(CodeActivityContext context) { var sequence = Sequence.Get(context); StringBuilder buff = new StringBuilder(); buff.AppendLine(sequence.ID); buff.Append("Statistics: "); buff.Append(sequence.Count); buff.Append(" Total"); var statistics = new SequenceStatistics(sequence); foreach (var symbol in sequence.Alphabet) { buff.AppendFormat(" - {0}:", (char)symbol); buff.Append(statistics.GetCount(symbol)); } buff.AppendLine(); buff.AppendLine(); for (int i = 0; i < sequence.Count; i++) { if ((i % 50) == 0) { string num = (i + 1).ToString(CultureInfo.InvariantCulture); int pad = 5 - num.Length; StringBuilder buff2 = new StringBuilder(); for (int j = 0; j < pad; j++) { buff2.Append(' '); } buff2.Append(num); buff.Append(buff2); } if ((i % 10) == 0) { buff.Append(' '); } buff.Append((char)sequence[i]); if ((i % 50) == 49) { buff.AppendLine(); } } buff.AppendLine(); return(buff.ToString()); }
static void Main(string[] args) { var parser = SequenceParsers.FindParserByFileName(Filename); if (parser == null) { Console.WriteLine("No parser found."); return; } List <ISequence> allSequences = parser .Parse() .ToList(); IAlphabet alphabet = allSequences[0].Alphabet; long totalColums = allSequences.Min(s => s.Count); //byte[] data = new byte[allSequences.Count]; for (int column = 0; column < totalColums; column++) { // for (int row = 0; row < allSequences.Count; row++) // data[row] = allSequences[row][column]; //ISequence newSequence = new Sequence(alphabet, data); ISequence newSequence = new Sequence(AmbiguousRnaAlphabet.Instance, allSequences .Select(s => s[column]).ToArray()); SequenceStatistics stats = new SequenceStatistics(newSequence); var TopCount = alphabet .Where(symbol => !alphabet.CheckIsGap(symbol)) .Select(symbol => new { Symbol = (char)symbol, Count = stats.GetCount(symbol), Frequency = stats.GetFraction(symbol) }) .Where(tc => tc.Count > 0) .OrderByDescending(c => c.Count) .FirstOrDefault(); if (TopCount != null) { Console.WriteLine("{0}: {1} = {2} of {3}", column, TopCount.Symbol, TopCount.Count, totalColums); } } }
public void ValidateProteinSparseSequenceConstAlpIndexByte() { byte[] byteArrayObj = encodingObj.GetBytes("KIEG"); SparseSequence sparseSeq = new SparseSequence(Alphabets.Protein, 1, byteArrayObj[0]); Assert.IsNotNull(sparseSeq); Assert.IsNotNull(sparseSeq.Statistics); SequenceStatistics seqStatObj = sparseSeq.Statistics; Assert.AreEqual(1, seqStatObj.GetCount('K')); Console.WriteLine("SparseSequence P1: Validation of SparseSequence(alp, index, byte) constructor is completed"); ApplicationLog.WriteLine("SparseSequence P1: Validation of SparseSequence(alp, index, byte) constructor is completed"); }
public void ValidateProteinSparseSequenceConstAlpIndexByteList() { byte[] byteArrayObj = Encoding.ASCII.GetBytes("KIEG"); IEnumerable <byte> seqItems = new List <Byte> { byteArrayObj[0], byteArrayObj[1], byteArrayObj[2], byteArrayObj[3] }; var sparseSeq = new SparseSequence(Alphabets.Protein, 4, seqItems); Assert.IsNotNull(sparseSeq); Assert.IsNotNull(sparseSeq.Statistics); Assert.AreEqual(8, sparseSeq.Count); SequenceStatistics seqStatObj = sparseSeq.Statistics; Assert.AreEqual(1, seqStatObj.GetCount('K')); Assert.AreEqual(1, seqStatObj.GetCount('I')); Assert.AreEqual(1, seqStatObj.GetCount('E')); Assert.AreEqual(1, seqStatObj.GetCount('G')); ApplicationLog.WriteLine("SparseSequence P1: Validation of SparseSequence(alp, index, seq items) constructor is completed"); }
public void CreateStatsAndConsumeEnumerable() { ISequence sequence = new Sequence(Alphabets.DNA, "ACGT--ACGT--ACGT--"); SequenceStatistics stats = new SequenceStatistics(sequence); int loopCounts = 0; foreach (var value in stats.SymbolCounts) { Assert.AreEqual(value.Item2, stats.GetCount(value.Item1)); loopCounts++; } Assert.AreEqual(5, loopCounts); }
public void CreateStatsWithSeveralMixedcaseLetterSequence() { ISequence sequence = new Sequence(Alphabets.DNA, "a-c-g-t-A-C-G-T-"); SequenceStatistics stats = new SequenceStatistics(sequence); Assert.AreEqual(2, stats.GetCount('A')); Assert.AreEqual(2, stats.GetCount('a')); Assert.AreEqual(2, stats.GetCount('C')); Assert.AreEqual(2, stats.GetCount('c')); Assert.AreEqual(2, stats.GetCount('G')); Assert.AreEqual(2, stats.GetCount('g')); Assert.AreEqual(2, stats.GetCount('T')); Assert.AreEqual(2, stats.GetCount('t')); Assert.AreEqual(8, stats.GetCount('-')); Assert.AreEqual(2.0 / 16.0, stats.GetFraction('A')); Assert.AreEqual(2.0 / 16.0, stats.GetFraction('C')); Assert.AreEqual(2.0 / 16.0, stats.GetFraction('G')); Assert.AreEqual(2.0 / 16.0, stats.GetFraction('T')); Assert.AreEqual(.5, stats.GetFraction('-')); Assert.AreEqual(.5, stats.GetFraction(45)); }
/// <summary> /// The execution method for the activity. /// </summary> /// <param name="executionContext">The execution context.</param> /// <returns>The execution status.</returns> protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { StringBuilder buff = new StringBuilder(); buff.AppendLine(Sequence.ID); buff.Append("Statistics: "); buff.Append(Sequence.Count); buff.Append(" Total"); var statistics = new SequenceStatistics(Sequence); if (Sequence.Alphabet == Alphabets.DNA) { buff.Append(" - G: "); buff.Append(statistics.GetCount(Alphabets.DNA.G)); buff.Append(" - A: "); buff.Append(statistics.GetCount(Alphabets.DNA.A)); buff.Append(" - T: "); buff.Append(statistics.GetCount(Alphabets.DNA.T)); buff.Append(" - C: "); buff.Append(statistics.GetCount(Alphabets.DNA.C)); } else if (Sequence.Alphabet == Alphabets.RNA) { buff.Append(" - G: "); buff.Append(statistics.GetCount(Alphabets.RNA.G)); buff.Append(" - A: "); buff.Append(statistics.GetCount(Alphabets.RNA.A)); buff.Append(" - U: "); buff.Append(statistics.GetCount(Alphabets.RNA.U)); buff.Append(" - C: "); buff.Append(statistics.GetCount(Alphabets.RNA.C)); } buff.AppendLine(); buff.AppendLine(); for (int i = 0; i < Sequence.Count; i++) { if ((i % 50) == 0) { string num = (i + 1).ToString(); int pad = 5 - num.Length; StringBuilder buff2 = new StringBuilder(); for (int j = 0; j < pad; j++) { buff2.Append(' '); } buff2.Append(num); buff.Append(buff2.ToString()); } if ((i % 10) == 0) { buff.Append(' '); } buff.Append((char)Sequence[i]); if ((i % 50) == 49) { buff.AppendLine(); } } buff.AppendLine(); Data = buff.ToString(); return(ActivityExecutionStatus.Closed); }
public void CreateStatsAndConsumeEnumerable() { ISequence sequence = new Sequence(Alphabets.DNA, "ACGT--ACGT--ACGT--"); SequenceStatistics stats = new SequenceStatistics(sequence); int loopCounts = 0; foreach (var value in stats.SymbolCounts) { Assert.AreEqual(value.Item2, stats.GetCount(value.Item1)); loopCounts++; } Assert.AreEqual(5,loopCounts); }