static public AASeq GetCompressedInstance(string caseId, AASeq aaSeqIn, TextWriter errorStream) { AASeq aaSeqOut = new AASeq(aaSeqIn.Mixture); aaSeqOut.Sequence = new List <Set <char> >(); aaSeqOut._originalAA1PositionTableOrNull = new List <string>(); for (int iChar = 0; iChar < aaSeqIn.Count; ++iChar) { Set <char> set = aaSeqIn[iChar]; string originalAA1Position = aaSeqIn.OriginalAA1Position(iChar); if (set.Equals(Delete)) //!!!const { continue; } if (set.Equals(Stop)) //!!!const { if (iChar != aaSeqIn.Count - 1) { errorStream.WriteLine("Warning: The sequence for case id '{0}' contains a '*' before the last position", caseId); } break; } aaSeqOut.Sequence.Add(set); aaSeqOut._originalAA1PositionTableOrNull.Add(originalAA1Position); } return(aaSeqOut); }
public IEnumerable <string> SparseLineEnumeration(bool keepOneValueVariables) { if (_caseIdToAASeq.Count == 0) { Debug.Assert(SequenceLength == null); // real assert yield break; } SpecialFunctions.CheckCondition(SequenceLength != null, "This converter to sparse assumes all sequences have the same length"); /* * n1pos aa pid val * 880 A 3 F * 880 A 5 F * 880 A 9 F * 880 A 13 F * 880 A 14 F * 880 A 15 T * ... */ for (int aa0Pos = 0; aa0Pos < (int)SequenceLength; ++aa0Pos) { Set <char> everyAminoAcid = EveryAminoAcid(aa0Pos); if (!keepOneValueVariables && everyAminoAcid.Count == 1) { continue; } string posName = null; foreach (char aa in everyAminoAcid) { Set <bool> valueSet = Set <bool> .GetInstance(); Dictionary <string, bool> caseToVal = new Dictionary <string, bool>(); foreach (string caseId in _caseIdToAASeq.Keys) { AASeq aaSeq = _caseIdToAASeq[caseId]; //SpecialFunctions.CheckCondition(aaSeq.IsUsingOriginalPositions(), "This converter to sparse assumes all sequences are using their original positions"); Set <char> strainAASet = aaSeq[aa0Pos]; if (posName == null) { posName = aaSeq.OriginalAA1Position(aa0Pos); } else { SpecialFunctions.CheckCondition(posName == aaSeq.OriginalAA1Position(aa0Pos)); } // missing: e.g. A/Any or A/AB // 1: e.g. A/A // 0: e.g. A/B or A/BCD if (strainAASet.Equals(AASeq.Any)) { //Do nothing - missing } else if (strainAASet.Contains(aa)) { if (strainAASet.Count > 1) { if (aaSeq.Mixture) { caseToVal.Add(caseId, false); valueSet.AddNewOrOld(false); } else { // Do nothing = missing } } else { caseToVal.Add(caseId, true); valueSet.AddNewOrOld(true); } } else { caseToVal.Add(caseId, false); valueSet.AddNewOrOld(false); } } SpecialFunctions.CheckCondition(posName != null); if (keepOneValueVariables || valueSet.Count == 2) { foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal) { //string variableName = string.Format("{0}@{1}", posName, aa); string variableName = string.Format("{1}@{0}", posName, aa); yield return(SpecialFunctions.CreateTabString( variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0)); } } } } }