public static NamedSequence GetConsensus(List <NamedSequence> seqs) { int len = -1; //StringBuilder consensusSeq = new StringBuilder(); List <AASeq> aaSeqs = new List <AASeq>(); bool isDna = seqs[0].IsDna(); foreach (NamedSequence seq in seqs) { AASeq aaSeq = isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty) : AASeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty); len = Math.Max(len, aaSeq.Count); //if (len < 0) //{ // len = aaSeq.Count; //} //else //{ // Helper.CheckCondition(len == aaSeq.Count, String.Format("Sequence {0} is a different length from previous sequences", seq.Name)); //} aaSeqs.Add(aaSeq); } //for (int i = 0; i < len; i++) //{ // Dictionary<string, int> charToCount = new Dictionary<string, int>(); // KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1); // foreach (AASeq aaSeq in aaSeqs) // { // if (i < aaSeq.Count) // { // string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString(); // charToCount[residue] = SpecialFunctions.GetValueOrDefault(charToCount, residue) + 1; // if (charToCount[residue] > currentConsensus.Value) // { // currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]); // } // } // } // consensusSeq.Append(currentConsensus.Key); //} AASeq consensusAaSeq = GetAaSeqConsensus(aaSeqs); NamedSequence consensus = new NamedSequence("consensus", consensusAaSeq.ToString()); return(consensus); }
new static public AASeq GetInstance(string dnaSeqAsString, MixtureSemantics mixtureSemantics, int offset) { DnaSeq dnaSeq = new DnaSeq(dnaSeqAsString, mixtureSemantics, offset); return(dnaSeq); }
public void WriteAsTable(List <NamedSequence> sequences, TextWriter writer) { CaseIdToAASeq cidToAASeq = CaseIdToAASeq.GetInstance(); bool isDna = sequences[0].IsDna(); foreach (NamedSequence seq in sequences) { cidToAASeq.Add(seq.Name, isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics) : AASeq.GetInstance(seq.Sequence, MixtureSemantics)); } List <string> header = new List <string>(sequences.Count + 1); header.Add("Var"); header.AddRange(sequences.Select(seq => seq.Name)); writer.WriteLine(header.StringJoin("\t")); int maxLen = cidToAASeq.Dictionary.Values.Select(aaSeq => aaSeq.Count).Max(); for (int pos0 = 0; pos0 < maxLen; pos0++) { foreach (char aa in cidToAASeq.EveryAminoAcid(pos0)) { string merAndPos = (pos0 + 1) + "@" + aa; int?[] values = new int?[sequences.Count]; HashSet <int> nonMissingValues = new HashSet <int>(); for (int pidIdx = 0; pidIdx < sequences.Count; pidIdx++) { int? value; Set <char> observedAAs = cidToAASeq.Dictionary[sequences[pidIdx].Name][pos0]; if (observedAAs.Contains('?') || observedAAs.Count == 0 || (observedAAs.Count > 1 && MixtureSemantics == MixtureSemantics.Uncertainty && observedAAs.Contains(aa))) { value = null; } else if (observedAAs.Contains(aa) && (MixtureSemantics != MixtureSemantics.Pure || observedAAs.Count == 1)) { value = 1; } else { value = 0; } values[pidIdx] = value; if (value != null) { nonMissingValues.Add((int)value); } } if (nonMissingValues.Count > 1 || (KeepOneValueVariables && nonMissingValues.Count == 1 && nonMissingValues.First() == 1)) { writer.WriteLine(Helper.CreateTabString(merAndPos, values.Select(v => v.HasValue ? v.ToString() : MissingStatistics.GetInstance().ToString()).StringJoin("\t"))); } } } writer.Flush(); }