private static Dictionary <string, Dictionary <string, int> > CreateMerStringToOriginalAA0PositionToCount(int merLength, TextWriter textWriterForWarnings, Dictionary <string, AASeq> caseToCompressedAASeq) { Dictionary <string, Dictionary <string, int> > merStringToOriginalAA0PositionToCount = new Dictionary <string, Dictionary <string, int> >(); foreach (string caseId in caseToCompressedAASeq.Keys) { AASeq aaSeq = caseToCompressedAASeq[caseId]; Set <string> SeenIt = new Set <string>(); foreach (AASeq mer in aaSeq.SubSeqEnumeration(merLength)) { if (mer.Ambiguious) { continue; } string merString = mer.ToString(); if (SeenIt.Contains(merString)) { textWriterForWarnings.WriteLine("Warning: Mer '{0}' appears again in case '{1}'", merString, caseId); } SeenIt.AddNewOrOld(merString); string originalAA1Position = mer.OriginalAA1Position(0); Dictionary <string, int> originalAA0PositionToCount = SpecialFunctions.GetValueOrDefault(merStringToOriginalAA0PositionToCount, merString); originalAA0PositionToCount[originalAA1Position] = 1 + SpecialFunctions.GetValueOrDefault(originalAA0PositionToCount, originalAA1Position); } } return(merStringToOriginalAA0PositionToCount); }
static public AASeq GetCompressedInstance(string caseId, AASeq aaSeqIn, TextWriter errorStream) { AASeq aaSeqOut = new AASeq(aaSeqIn.Mixture); aaSeqOut.Sequence = new List <Set <char> >(); aaSeqOut._originalAA1PositionTableOrNull = new List <string>(); for (int iChar = 0; iChar < aaSeqIn.Count; ++iChar) { Set <char> set = aaSeqIn[iChar]; string originalAA1Position = aaSeqIn.OriginalAA1Position(iChar); if (set.Equals(Delete)) //!!!const { continue; } if (set.Equals(Stop)) //!!!const { if (iChar != aaSeqIn.Count - 1) { errorStream.WriteLine("Warning: The sequence for case id '{0}' contains a '*' before the last position", caseId); } break; } aaSeqOut.Sequence.Add(set); aaSeqOut._originalAA1PositionTableOrNull.Add(originalAA1Position); } return(aaSeqOut); }
static public AASeq GetInstance(string aaSeqAsString, bool mixture) { AASeq aaSeq = new AASeq(mixture); aaSeq.Sequence = CreateSequence(aaSeqAsString); return(aaSeq); }
internal IEnumerable <AASeq> SubSeqEnumeration(int merLength) { for (int startIndex = 0; startIndex <= Sequence.Count - merLength; ++startIndex) { AASeq aaSeqOut = SubSeqAA0Pos(startIndex, merLength); yield return(aaSeqOut); } }
static public AASeq GetInstance(string aaSeqAsString, List <string> originalAA1PositionTable, bool mixture) { AASeq aaSeq = new AASeq(mixture); aaSeq.Sequence = CreateSequence(aaSeqAsString); SpecialFunctions.CheckCondition(aaSeq.Count == originalAA1PositionTable.Count, "aaSeq and position table must be same length"); aaSeq._originalAA1PositionTableOrNull = originalAA1PositionTable; return(aaSeq); }
public bool TrySubSeqAA0Pos(int aa0Pos, int merLength, out AASeq aaSeq) { if (aa0Pos < 0 || aa0Pos + merLength > this.Sequence.Count) { aaSeq = null; return(false); } aaSeq = SubSeqAA0Pos(aa0Pos, merLength); return(true); }
//public void CreateSparseFile(string outputFileName, bool keepOneValueVariables) //{ // CreateSparseFile(outputFileName, keepOneValueVariables); //} private Dictionary <string, AASeq> RemoveDeletesAndStopsFromData(TextWriter textWriter) { Dictionary <string, AASeq> compressedDictionary = new Dictionary <string, AASeq>(); foreach (KeyValuePair <string, AASeq> caseIdAndAASeq in _caseIdToAASeq) { AASeq compressedAASeq = AASeq.GetCompressedInstance(caseIdAndAASeq.Key, caseIdAndAASeq.Value, textWriter); compressedDictionary.Add(caseIdAndAASeq.Key, compressedAASeq); } return(compressedDictionary); }
public void Add(string caseId, AASeq aaSeq) { SpecialFunctions.CheckCondition(!_caseIdToAASeq.ContainsKey(caseId), string.Format("caseId {0} appears more than once", caseId)); if (null == SequenceLength) { SequenceLength = aaSeq.Count; } if (SequenceLength != aaSeq.Count) { Console.WriteLine("Warning: Not all amino acid sequences are of the same length"); } _caseIdToAASeq.Add(caseId, aaSeq); }
public override bool Equals(object obj) { AASeq other = obj as AASeq; if (other == null) { return(false); } else { return(Sequence == other.Sequence); } }
// /* // 1189MB MEPVDPNLEPWNHPGSQPKTPCTNCYCKHCSYHCLVCFQTKGLGISYGRK // J112MA MEPVDPNLEPWNHPGSQPITACNKCYCKYCSYHCLVCFQTKGLGISYGRK // 1157M3M MEPVDPNLEPWNHPGSQPKTPCNKCYCKHCSYHCLVCFQTKGLGISYGRK // 1195MB MEPVDPNLEPWNHPGSQPKTPCNKCYCKYCSYHCLVCFQTKGLGISYGRK // */ static public CaseIdToAASeq GetInstance(TextReader textReader, bool mixture) { CaseIdToAASeq caseIdToAASeq = CaseIdToAASeq.GetInstance(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(textReader, "cid\taaSeq", false)) { string caseId = row["cid"]; //!!!const string aaSeqAsString = row["aaSeq"]; //!!!const AASeq aaSeq = AASeq.GetInstance(aaSeqAsString, mixture); caseIdToAASeq.Add(caseId, aaSeq); } return(caseIdToAASeq); }
public AASeq SubSeqAA0Pos(int aa0Pos, int merLength) { List <Set <char> > subSequence = SpecialFunctions.SubList(Sequence, aa0Pos, merLength); AASeq aaSeqOut = new AASeq(Mixture); aaSeqOut.Sequence = subSequence; aaSeqOut._originalAA1PositionTableOrNull = new List <string>(); for (int aa0 = aa0Pos; aa0 < aa0Pos + merLength; ++aa0) { string originalAA1Position = OriginalAA1Position(aa0); aaSeqOut._originalAA1PositionTableOrNull.Add(originalAA1Position); } return(aaSeqOut); }
private Dictionary <string, bool> FindMerValues(string merAsString, Dictionary <string, AASeq> caseToCompressedAASeq, out Dictionary <bool, int> valueToNonZeroCount) { Regex merAsRegex = AASeq.CreateMerRegex(merAsString); Dictionary <string, bool> merValues = new Dictionary <string, bool>(); valueToNonZeroCount = new Dictionary <bool, int>(); foreach (KeyValuePair <string, AASeq> caseIdAndCompressedAASeq in caseToCompressedAASeq) { string caseId = caseIdAndCompressedAASeq.Key; AASeq compressedAASeq = caseIdAndCompressedAASeq.Value; bool?containsMer = compressedAASeq.ContainsMer(merAsString, merAsRegex); if (null != containsMer) { merValues.Add(caseId, (bool)containsMer); valueToNonZeroCount[(bool)containsMer] = 1 + SpecialFunctions.GetValueOrDefault(valueToNonZeroCount, (bool)containsMer); } } return(merValues); }
public IEnumerable <string> SparseLineEnumeration(bool keepOneValueVariables) { if (_caseIdToAASeq.Count == 0) { Debug.Assert(SequenceLength == null); // real assert yield break; } SpecialFunctions.CheckCondition(SequenceLength != null, "This converter to sparse assumes all sequences have the same length"); /* * n1pos aa pid val * 880 A 3 F * 880 A 5 F * 880 A 9 F * 880 A 13 F * 880 A 14 F * 880 A 15 T * ... */ for (int aa0Pos = 0; aa0Pos < (int)SequenceLength; ++aa0Pos) { Set <char> everyAminoAcid = EveryAminoAcid(aa0Pos); if (!keepOneValueVariables && everyAminoAcid.Count == 1) { continue; } string posName = null; foreach (char aa in everyAminoAcid) { Set <bool> valueSet = Set <bool> .GetInstance(); Dictionary <string, bool> caseToVal = new Dictionary <string, bool>(); foreach (string caseId in _caseIdToAASeq.Keys) { AASeq aaSeq = _caseIdToAASeq[caseId]; //SpecialFunctions.CheckCondition(aaSeq.IsUsingOriginalPositions(), "This converter to sparse assumes all sequences are using their original positions"); Set <char> strainAASet = aaSeq[aa0Pos]; if (posName == null) { posName = aaSeq.OriginalAA1Position(aa0Pos); } else { SpecialFunctions.CheckCondition(posName == aaSeq.OriginalAA1Position(aa0Pos)); } // missing: e.g. A/Any or A/AB // 1: e.g. A/A // 0: e.g. A/B or A/BCD if (strainAASet.Equals(AASeq.Any)) { //Do nothing - missing } else if (strainAASet.Contains(aa)) { if (strainAASet.Count > 1) { if (aaSeq.Mixture) { caseToVal.Add(caseId, false); valueSet.AddNewOrOld(false); } else { // Do nothing = missing } } else { caseToVal.Add(caseId, true); valueSet.AddNewOrOld(true); } } else { caseToVal.Add(caseId, false); valueSet.AddNewOrOld(false); } } SpecialFunctions.CheckCondition(posName != null); if (keepOneValueVariables || valueSet.Count == 2) { foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal) { //string variableName = string.Format("{0}@{1}", posName, aa); string variableName = string.Format("{1}@{0}", posName, aa); yield return(SpecialFunctions.CreateTabString( variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0)); } } } } }
public bool TrySubSeqAA1Pos(int aa1Pos, int merLength, out AASeq aaSeq) { return(TrySubSeqAA0Pos(aa1Pos - 1, merLength, out aaSeq)); }