// /* // 1189MB MEPVDPNLEPWNHPGSQPKTPCTNCYCKHCSYHCLVCFQTKGLGISYGRK // J112MA MEPVDPNLEPWNHPGSQPITACNKCYCKYCSYHCLVCFQTKGLGISYGRK // 1157M3M MEPVDPNLEPWNHPGSQPKTPCNKCYCKHCSYHCLVCFQTKGLGISYGRK // 1195MB MEPVDPNLEPWNHPGSQPKTPCNKCYCKYCSYHCLVCFQTKGLGISYGRK // */ static public CaseIdToAASeq GetInstance(TextReader textReader, MixtureSemantics mixtureSemantics, int offset) { CaseIdToAASeq caseIdToAASeq = CaseIdToAASeq.GetInstance(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(textReader, "cid\taaSeq", false)) { string caseId = row["cid"]; //!!!const string aaSeqAsString = row["aaSeq"]; //!!!const AASeq aaSeq = AASeq.GetInstance(aaSeqAsString, mixtureSemantics, offset); caseIdToAASeq.Add(caseId, aaSeq); } return(caseIdToAASeq); }
public void WriteAsTable(List <NamedSequence> sequences, TextWriter writer) { CaseIdToAASeq cidToAASeq = CaseIdToAASeq.GetInstance(); bool isDna = sequences[0].IsDna(); foreach (NamedSequence seq in sequences) { cidToAASeq.Add(seq.Name, isDna ? DnaSeq.GetInstance(seq.Sequence, MixtureSemantics) : AASeq.GetInstance(seq.Sequence, MixtureSemantics)); } List <string> header = new List <string>(sequences.Count + 1); header.Add("Var"); header.AddRange(sequences.Select(seq => seq.Name)); writer.WriteLine(header.StringJoin("\t")); int maxLen = cidToAASeq.Dictionary.Values.Select(aaSeq => aaSeq.Count).Max(); for (int pos0 = 0; pos0 < maxLen; pos0++) { foreach (char aa in cidToAASeq.EveryAminoAcid(pos0)) { string merAndPos = (pos0 + 1) + "@" + aa; int?[] values = new int?[sequences.Count]; HashSet <int> nonMissingValues = new HashSet <int>(); for (int pidIdx = 0; pidIdx < sequences.Count; pidIdx++) { int? value; Set <char> observedAAs = cidToAASeq.Dictionary[sequences[pidIdx].Name][pos0]; if (observedAAs.Contains('?') || observedAAs.Count == 0 || (observedAAs.Count > 1 && MixtureSemantics == MixtureSemantics.Uncertainty && observedAAs.Contains(aa))) { value = null; } else if (observedAAs.Contains(aa) && (MixtureSemantics != MixtureSemantics.Pure || observedAAs.Count == 1)) { value = 1; } else { value = 0; } values[pidIdx] = value; if (value != null) { nonMissingValues.Add((int)value); } } if (nonMissingValues.Count > 1 || (KeepOneValueVariables && nonMissingValues.Count == 1 && nonMissingValues.First() == 1)) { writer.WriteLine(Helper.CreateTabString(merAndPos, values.Select(v => v.HasValue ? v.ToString() : MissingStatistics.GetInstance().ToString()).StringJoin("\t"))); } } } writer.Flush(); }