// /* // 1189MB MEPVDPNLEPWNHPGSQPKTPCTNCYCKHCSYHCLVCFQTKGLGISYGRK // J112MA MEPVDPNLEPWNHPGSQPITACNKCYCKYCSYHCLVCFQTKGLGISYGRK // 1157M3M MEPVDPNLEPWNHPGSQPKTPCNKCYCKHCSYHCLVCFQTKGLGISYGRK // 1195MB MEPVDPNLEPWNHPGSQPKTPCNKCYCKYCSYHCLVCFQTKGLGISYGRK // */ static public CaseIdToAASeq GetInstance(TextReader textReader, MixtureSemantics mixtureSemantics, int offset) { CaseIdToAASeq caseIdToAASeq = CaseIdToAASeq.GetInstance(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(textReader, "cid\taaSeq", false)) { string caseId = row["cid"]; //!!!const string aaSeqAsString = row["aaSeq"]; //!!!const AASeq aaSeq = AASeq.GetInstance(aaSeqAsString, mixtureSemantics, offset); caseIdToAASeq.Add(caseId, aaSeq); } return(caseIdToAASeq); }
public static AASeq GetAaSeqConsensus(List <AASeq> aaSeqs) { int len = -1; StringBuilder consensusSeq = new StringBuilder(); foreach (AASeq seq in aaSeqs) { len = Math.Max(len, seq.Count); } for (int pos0 = 0; pos0 < len; pos0++) { consensusSeq.Append(AASeq.CountAasAtPos(aaSeqs, pos0)[0].Key); } //for (int i = 0; i < len; i++) //{ // Dictionary<string, int> charToCount = new Dictionary<string, int>(); // KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1); // foreach (AASeq aaSeq in aaSeqs) // { // if (i < aaSeq.Count) // { // string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString(); // charToCount[residue] = charToCount.GetValueOrDefault(residue) + 1; // if (charToCount[residue] > currentConsensus.Value) // { // currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]); // } // } // } // consensusSeq.Append(currentConsensus.Key); //} AASeq consensus = AASeq.GetInstance(consensusSeq.ToString(), MixtureSemantics.Uncertainty); //NamedSequence consensus = new NamedSequence("consensus", consensusSeq.ToString()); return(consensus); }
public static NamedSequence GetConsensus(List <NamedSequence> seqs) { int len = -1; //StringBuilder consensusSeq = new StringBuilder(); List <AASeq> aaSeqs = new List <AASeq>(); bool isDna = seqs[0].IsDna(); foreach (NamedSequence seq in seqs) { AASeq aaSeq = isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty) : AASeq.GetInstance(seq.Sequence, MixtureSemantics.Uncertainty); len = Math.Max(len, aaSeq.Count); //if (len < 0) //{ // len = aaSeq.Count; //} //else //{ // Helper.CheckCondition(len == aaSeq.Count, String.Format("Sequence {0} is a different length from previous sequences", seq.Name)); //} aaSeqs.Add(aaSeq); } //for (int i = 0; i < len; i++) //{ // Dictionary<string, int> charToCount = new Dictionary<string, int>(); // KeyValuePair<string, int> currentConsensus = new KeyValuePair<string, int>("z", -1); // foreach (AASeq aaSeq in aaSeqs) // { // if (i < aaSeq.Count) // { // string residue = aaSeq.SubSeqAA0Pos(i, 1).ToString(); // charToCount[residue] = SpecialFunctions.GetValueOrDefault(charToCount, residue) + 1; // if (charToCount[residue] > currentConsensus.Value) // { // currentConsensus = new KeyValuePair<string, int>(residue, charToCount[residue]); // } // } // } // consensusSeq.Append(currentConsensus.Key); //} AASeq consensusAaSeq = GetAaSeqConsensus(aaSeqs); NamedSequence consensus = new NamedSequence("consensus", consensusAaSeq.ToString()); return(consensus); }
public void WriteAsTable(List <NamedSequence> sequences, TextWriter writer) { CaseIdToAASeq cidToAASeq = CaseIdToAASeq.GetInstance(); bool isDna = sequences[0].IsDna(); foreach (NamedSequence seq in sequences) { cidToAASeq.Add(seq.Name, isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics) : AASeq.GetInstance(seq.Sequence, MixtureSemantics)); } List <string> header = new List <string>(sequences.Count + 1); header.Add("Var"); header.AddRange(sequences.Select(seq => seq.Name)); writer.WriteLine(header.StringJoin("\t")); int maxLen = cidToAASeq.Dictionary.Values.Select(aaSeq => aaSeq.Count).Max(); for (int pos0 = 0; pos0 < maxLen; pos0++) { foreach (char aa in cidToAASeq.EveryAminoAcid(pos0)) { string merAndPos = (pos0 + 1) + "@" + aa; int?[] values = new int?[sequences.Count]; HashSet <int> nonMissingValues = new HashSet <int>(); for (int pidIdx = 0; pidIdx < sequences.Count; pidIdx++) { int? value; Set <char> observedAAs = cidToAASeq.Dictionary[sequences[pidIdx].Name][pos0]; if (observedAAs.Contains('?') || observedAAs.Count == 0 || (observedAAs.Count > 1 && MixtureSemantics == MixtureSemantics.Uncertainty && observedAAs.Contains(aa))) { value = null; } else if (observedAAs.Contains(aa) && (MixtureSemantics != MixtureSemantics.Pure || observedAAs.Count == 1)) { value = 1; } else { value = 0; } values[pidIdx] = value; if (value != null) { nonMissingValues.Add((int)value); } } if (nonMissingValues.Count > 1 || (KeepOneValueVariables && nonMissingValues.Count == 1 && nonMissingValues.First() == 1)) { writer.WriteLine(Helper.CreateTabString(merAndPos, values.Select(v => v.HasValue ? v.ToString() : MissingStatistics.GetInstance().ToString()).StringJoin("\t"))); } } } writer.Flush(); }