public IEnumerable <string> SparseLineMerEnumeration(bool keepOneValueVariables, int merLength) { if (_caseIdToAASeq.Count == 0) { Debug.Assert(SequenceLengthOrNull == null); // real assert yield break; } Helper.CheckCondition(SequenceLengthOrNull != null, "This converter to sparse assumes all sequences have the same length"); Dictionary <string, AASeq> caseToCompressedAASeq = RemoveDeletesAndStopsFromData(false, Console.Error); foreach (string mer in EveryUnambiguousStopFreeMer(merLength, caseToCompressedAASeq)) { Regex merAsRegex = AASeq.CreateMerRegex(mer); //!!!look for similar code elsewhere foreach (string protein in EveryProtein()) { Set <bool> valueSet = Set <bool> .GetInstance(); Dictionary <string, bool> caseToVal = new Dictionary <string, bool>(); foreach (string caseId in caseToCompressedAASeq.Keys) { AASeq aaSeq = caseToCompressedAASeq[caseId]; Helper.CheckCondition(aaSeq.MixtureSemantics == MixtureSemantics.Uncertainty, "Code does not expect Mixture semantics"); bool?containsOrNull = aaSeq.ContainsMer(mer, merAsRegex, protein); if (null == containsOrNull) { continue; } else if ((bool)containsOrNull) { caseToVal.Add(caseId, true); valueSet.AddNewOrOld(true); } else { caseToVal.Add(caseId, false); valueSet.AddNewOrOld(false); } } if (keepOneValueVariables || valueSet.Count == 2) { foreach (KeyValuePair <string, bool> caseIdAndVal in caseToVal) { string variableName = protein + "@" + mer; yield return(Helper.CreateTabString( variableName, caseIdAndVal.Key, caseIdAndVal.Value ? 1 : 0)); } } } } }
private Dictionary <string, bool> FindMerValues(string merAsString, Dictionary <string, AASeq> caseToCompressedAASeq, out Dictionary <bool, int> valueToNonZeroCount) { Regex merAsRegex = AASeq.CreateMerRegex(merAsString); Dictionary <string, bool> merValues = new Dictionary <string, bool>(); valueToNonZeroCount = new Dictionary <bool, int>(); foreach (KeyValuePair <string, AASeq> caseIdAndCompressedAASeq in caseToCompressedAASeq) { string caseId = caseIdAndCompressedAASeq.Key; AASeq compressedAASeq = caseIdAndCompressedAASeq.Value; bool?containsMer = compressedAASeq.ContainsMer(merAsString, merAsRegex); if (null != containsMer) { merValues.Add(caseId, (bool)containsMer); valueToNonZeroCount[(bool)containsMer] = 1 + valueToNonZeroCount.GetValueOrDefault((bool)containsMer); } } return(merValues); }