override public IEnumerable <TrueCollection> Collection() { Set <Hla> assignmentAsSet = CreateAssignmentAsSet(QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ); foreach (Hla originalTrueHla in QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ) { if (QmmrModelOnePeptide.QmrrModelMissingAssignment.KnownHlaSet.Contains(originalTrueHla)) { continue; } assignmentAsSet.Remove(originalTrueHla); foreach (Hla originalFalseHla in QmmrModelOnePeptide.QmrrModelMissingAssignment.SwitchableHlasOfRespondingPatients) { if (!assignmentAsSet.Contains(originalFalseHla)) { assignmentAsSet.AddNew(originalFalseHla); Debug.Assert(assignmentAsSet.Count == QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ.Count); // real assert yield return(TrueCollection.GetInstance(assignmentAsSet)); assignmentAsSet.Remove(originalFalseHla); } } assignmentAsSet.AddNew(originalTrueHla); Debug.Assert(assignmentAsSet.Count == QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ.Count); // real assert } }
private static void AddEiFeatures(bool includeChemicalProperties, bool includeAAFeatures, bool substractChemAACrissCrossFeatures, NEC nec, Pair <NEC, Hla> necAndHla, Set <IHashableFeature> hlaishFeatureSet, Set <IHashableFeature> featureSet) { for (int i = 0; i < nec.E.Length; ++i) { IHashableFeature featureE = E.GetInstance(i + 1); string aminoAcid = GetAminoAcidFromEpitopePosition(nec, i); if (includeAAFeatures) { IsAA featureAA = IsAA.GetInstance(aminoAcid, featureE); featureSet.AddNew(featureAA); Debug.Assert((bool)featureAA.Evaluate(necAndHla)); // real assert - must only generate true features foreach (IHashableFeature hlaishFeature in hlaishFeatureSet) { if (substractChemAACrissCrossFeatures && hlaishFeature is HasAAProp) { continue; } And featureHlaishAndAA = And.GetInstance(hlaishFeature, featureAA); featureSet.AddNew(featureHlaishAndAA); Debug.Assert((bool)featureHlaishAndAA.Evaluate(necAndHla)); // real assert - must only generate true features } } if (includeChemicalProperties) { foreach (string property in VirusCount.KmerProperties.AaToPropList[aminoAcid]) { HasAAProp featureAAProp = HasAAProp.GetInstance(property, featureE); featureSet.AddNew(featureAAProp); Debug.Assert((bool)featureAAProp.Evaluate(necAndHla)); // real assert - must only generate true features foreach (IHashableFeature hlaishFeature in hlaishFeatureSet) { if (substractChemAACrissCrossFeatures && hlaishFeature is IsAA) { continue; } And featureHlaishAndAAProb = And.GetInstance(hlaishFeature, featureAAProp); featureSet.AddNew(featureHlaishAndAAProb); Debug.Assert((bool)featureHlaishAndAAProb.Evaluate(necAndHla)); // real assert - must only generate true features } } } } //All of the above with AA replaced by chemical property of AA }
////!!!same logic is elseware. Look for common heading //private static Dictionary<string, Dictionary<string, double>> LoadReactTableUnfiltered(TextReader patientReader, out Set<string> cidsInReactTable) //{ // cidsInReactTable = Set<string>.GetInstance(); // Dictionary<string, Dictionary<string, double>> reactTable = new Dictionary<string, Dictionary<string, double>>(); // string header = "peptide cid magnitude"; // foreach (Dictionary<string, string> row in SpecialFunctions.TabFileTable(patientReader, header, false)) // { // string cid = row["cid"]; // cidsInReactTable.AddNewOrOld(cid); // string peptide = row["peptide"]; // double amount = double.Parse(row["magnitude"]); // Dictionary<string, double> peptideToAmount = SpecialFunctions.GetValueOrDefault(reactTable, peptide); // peptideToAmount.Add(cid, amount); // } // return reactTable; //} private Dictionary <string, Set <Hla> > LoadKnownTable(DbDataReader datareader, string caseName) { //!!!code appears elsewhere. Look for common header Dictionary <string, Set <Hla> > knownTable = new Dictionary <string, Set <Hla> >(); int indexPeptide = datareader.GetOrdinal("peptide"); int indexHLA = datareader.GetOrdinal("knownHLA"); int irecord = 0; while (datareader.Read()) { ++irecord; string peptide = datareader.GetString(indexPeptide).Trim(); string hlaName = datareader.GetString(indexHLA).Trim(); if (peptide.Length == 0 || hlaName.Length == 0) { continue; } Set <Hla> knownHlaSet = SpecialFunctions.GetValueOrDefault(knownTable, peptide); Hla hla = HlaFactory.GetGroundInstance(hlaName); SpecialFunctions.CheckCondition(!knownHlaSet.Contains(hla), string.Format("Hla {0} is duplicated in {1}known.txt, for peptide {2}", hla, caseName, peptide)); knownHlaSet.AddNew(hla); //!!!const } Console.WriteLine("{0}: number of records read: {1}", "Known table", irecord); return(knownTable); }
protected override MBT.Escience.Set <char> GetResidueSetFromCharAndCheckThatValid(char ch) { char chUpper = char.ToUpper(ch); Biology biology = Biology.GetInstance(); Set <char> result; if (biology.Unambiguous1LetterNucCodes.Contains(chUpper)) { result = new Set <char>(chUpper); } else if (biology.Ambiguous1LetterNucCodeToChoices.ContainsKey(chUpper)) { string basesAsString = biology.Ambiguous1LetterNucCodeToChoices[chUpper]; result = new Set <char>(); foreach (char c in basesAsString) { result.AddNew(c); } } else { throw new ArgumentException("Do not know dna base " + chUpper); } return(result); }
public IEnumerable <Pair <string, Hla> > PositiveExampleEnumeration() { //Don't repeat if the Mer/OriHla has already been seen, but DO repeat if the mer/HlaNorm has appeared before Set <Pair <string, Hla> > merAndHlaOriSet = new Set <Pair <string, Hla> >(); foreach (EpitopeLearningDataDupHlaOK epitopeLearningData in EpitopeLearningDataList) { if (null != epitopeLearningData) { foreach (KeyValuePair <string, Dictionary <Hla, Dictionary <Hla, bool> > > merAndHlaNormToHlaOriToLabel in epitopeLearningData) { string mer = merAndHlaNormToHlaOriToLabel.Key; foreach (KeyValuePair <Hla, Dictionary <Hla, bool> > hlaNormAndHlaNormToLabel in merAndHlaNormToHlaOriToLabel.Value) { Pair <string, Hla> merAndHlaNorm = new Pair <string, Hla>(mer, hlaNormAndHlaNormToLabel.Key); foreach (KeyValuePair <Hla, bool> HlaOriAndLabel in hlaNormAndHlaNormToLabel.Value) { Pair <string, Hla> merAndHlaOri = new Pair <string, Hla>(mer, HlaOriAndLabel.Key); if (!merAndHlaOriSet.Contains(merAndHlaOri)) { merAndHlaOriSet.AddNew(merAndHlaOri); yield return(merAndHlaNorm); } } } } } } }
private IEnumerable <string> EveryUnambiguousStopFreeMer(int merLength, Dictionary <string, AASeq> caseToCompressedAASeq) { Set <string> nonMissingMerSet = Set <string> .GetInstance(); foreach (AASeq aaSeq in caseToCompressedAASeq.Values) { foreach (AASeq mer in aaSeq.SubSeqEnumeration(merLength)) { if (mer.Ambiguous) { continue; } string merAsString = mer.ToString(); if (merAsString.Contains("*")) { continue; } Debug.Assert(!merAsString.Contains("-") && !merAsString.Contains("?")); // real assert if (nonMissingMerSet.Contains(merAsString)) { continue; } nonMissingMerSet.AddNew(merAsString); yield return(merAsString); } } }
static public IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > LoadSparseFileEnumeration(string sparseFileName) //where T1:ISufficientStatistics { Set <string> variablesAlreadySeenSet = new Set <string>(); Pair <string, Dictionary <string, SufficientStatistics> > variableAndCaseIdToNonMissingValue = null; foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(sparseFileName, "var\tcid\tval", false)) { string variable = row["var"]; if (variableAndCaseIdToNonMissingValue != null && variableAndCaseIdToNonMissingValue.First != variable) { yield return(variableAndCaseIdToNonMissingValue); variableAndCaseIdToNonMissingValue = null; } if (variableAndCaseIdToNonMissingValue == null) { SpecialFunctions.CheckCondition(!variablesAlreadySeenSet.Contains(variable), string.Format("Input file ({0}) is not grouped by variable. Variable {1} appears in multiple places", sparseFileName, variable)); variablesAlreadySeenSet.AddNew(variable); variableAndCaseIdToNonMissingValue = new Pair <string, Dictionary <string, SufficientStatistics> >(variable, new Dictionary <string, SufficientStatistics>()); } string caseId = row["cid"]; SufficientStatistics val = SufficientStatistics.Parse(row["val"]); SpecialFunctions.CheckCondition(!variableAndCaseIdToNonMissingValue.Second.ContainsKey(caseId), string.Format("Input file ({0}) for var {1} contains multiple entries for caseId {2}", sparseFileName, variable, caseId)); variableAndCaseIdToNonMissingValue.Second.Add(caseId, val); } if (variableAndCaseIdToNonMissingValue != null) { yield return(variableAndCaseIdToNonMissingValue); } }
public List <Set <char> > CreateCharSetList(string aaSeqAsString) { List <Set <char> > sequence = new List <Set <char> >(); Set <char> set = null; foreach (char ch in aaSeqAsString) { switch (ch) { case '{': { Helper.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings"); set = new Set <char>(); sequence.Add(set); break; } case '}': { Helper.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings"); Helper.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings"); set = null; break; } case ' ': { Helper.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing."); break; } case '?': case '-': { Helper.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch)); sequence.Add(Set <char> .GetInstance(ch)); break; } default: { Set <char> charAsSet = GetResidueSetFromCharAndCheckThatValid(ch); if (set == null) { sequence.Add(charAsSet); } else { set.AddNew(ch); } break; } } } Helper.CheckCondition(set == null, "Missing '}' in aaSeq string"); return(sequence); }
internal static Set <Hla> CreateAssignmentAsSet(TrueCollection startingAssignment) { Set <Hla> assignmentAsSet = Set <Hla> .GetInstance(); foreach (Hla hla in startingAssignment) { assignmentAsSet.AddNew(hla); } return(assignmentAsSet); }
private static Set <string> CreateSourceSet(string datasetName) { Set <string> sourceSet = Set <string> .GetInstance(); foreach (string source in datasetName.Split('+', ',')) { sourceSet.AddNew(source.ToLower()); } return(sourceSet); }
private static void SetUpInterestingHlaPositionsForAAFeaturesTable(Dictionary <string, string> hlaNameToSequence) { int sumOfNonMaxMin = ReadSumOfNonMaxMin(); string firstSequence = SpecialFunctions.First(hlaNameToSequence.Values); InterestingToAAExact = Set <HlaSeq> .GetInstance(); InterestingToAAAsPrefix = Set <HlaSeq> .GetInstance(); InterestingHlaSeqToPropertiesExact = new Dictionary <HlaSeq, Set <string> >(); InterestingHlaSeqToPropertiesAsPrefix = new Dictionary <HlaSeq, Set <string> >(); for (int posBase0 = 0; posBase0 < firstSequence.Length; ++posBase0) { Dictionary <char, int> aaToCount = new Dictionary <char, int>(); Dictionary <string, Dictionary <bool, int> > propertyToValueToCount = new Dictionary <string, Dictionary <bool, int> >(); foreach (string sequence in hlaNameToSequence.Values) { char aa = sequence[posBase0]; aaToCount[aa] = SpecialFunctions.GetValueOrDefault(aaToCount, aa) + 1; //string aaAsString = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[aa]; foreach (KeyValuePair <string, Set <char> > propertyAndAASet in VirusCount.KmerProperties.GetInstance().PropertyToAACharSet) { string property = propertyAndAASet.Key; Set <char> aaSet = propertyAndAASet.Value; bool val = aaSet.Contains(aa); Dictionary <bool, int> valueToCount = SpecialFunctions.GetValueOrDefault(propertyToValueToCount, property); valueToCount[val] = 1 + SpecialFunctions.GetValueOrDefault(valueToCount, val); } } if (SumOfNonMax(aaToCount.Values) >= sumOfNonMaxMin) { HlaSeq hlaSeqExact = HlaSeq.GetInstance(posBase0 + 1, false); InterestingToAAExact.AddNew(hlaSeqExact); HlaSeq hlaSeqAsPrefix = HlaSeq.GetInstance(posBase0 + 1, true); InterestingToAAAsPrefix.AddNew(hlaSeqAsPrefix); foreach (string property in propertyToValueToCount.Keys) { Dictionary <bool, int> valueToCount = propertyToValueToCount[property]; if (SumOfNonMax(valueToCount.Values) >= sumOfNonMaxMin) { Set <string> interestingPropertiesExact = SpecialFunctions.GetValueOrDefault(InterestingHlaSeqToPropertiesExact, hlaSeqExact); interestingPropertiesExact.AddNew(property); Set <string> interestingPropertiesAsPrefix = SpecialFunctions.GetValueOrDefault(InterestingHlaSeqToPropertiesAsPrefix, hlaSeqAsPrefix); interestingPropertiesAsPrefix.AddNew(property); } } } } }
private static void BitFlip(Set <Hla> assignmentAsSet, Hla hlaOfRepondingPatients) { if (assignmentAsSet.Contains(hlaOfRepondingPatients)) { assignmentAsSet.Remove(hlaOfRepondingPatients); } else { assignmentAsSet.AddNew(hlaOfRepondingPatients); } }
private static void CreateAndAddHlaFeature(bool subtractHlaFeatures, Hla hla, Pair <NEC, Hla> necAndHla, ref Set <IHashableFeature> hlaishEnumeration) { if (subtractHlaFeatures) { return; } IsHla featureHla = IsHla.GetInstance(hla); hlaishEnumeration.AddNew(featureHla); Debug.Assert((bool)featureHla.Evaluate(necAndHla)); // real assert - must only generate true features }
override public IEnumerable <TrueCollection> Collection() { Set <Hla> assignmentAsSet = CreateAssignmentAsSet(QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ); foreach (Hla hla in QmmrModelOnePeptide.QmrrModelMissingAssignment.SwitchableHlasOfRespondingPatients) { if (assignmentAsSet.Contains(hla)) { assignmentAsSet.Remove(hla); yield return(TrueCollection.GetInstance(assignmentAsSet)); assignmentAsSet.AddNew(hla); } else { assignmentAsSet.AddNew(hla); yield return(TrueCollection.GetInstance(assignmentAsSet)); assignmentAsSet.Remove(hla); } } }
private void ReadKnownTable() { _knownTable = new Dictionary <string, Set <Hla> >(); if (OptimizationParameterList["useKnownList"].Value == 1) { foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(KnownFileName, "peptide knownHLA", false)) //!!!const { string peptide = row["peptide"]; //!!!const Set <Hla> knownHlaSet = SpecialFunctions.GetValueOrDefault(_knownTable, peptide); knownHlaSet.AddNew(HlaFactory.GetGroundInstance(row["knownHLA"])); //!!!const } } }
static public List <NamedSequence> RemoveDuplicateSeqs(List <NamedSequence> seqs) { Set <string> observedSeqs = new Set <string>(); List <NamedSequence> result = new List <NamedSequence>(seqs.Count); foreach (NamedSequence seq in seqs) { if (!observedSeqs.Contains(seq.Sequence.ToLower())) { result.Add(seq); observedSeqs.AddNew(seq.Sequence.ToLower()); } } return(result); }
public static Set <char> PropertyNameToAASet(string propertyName) { KmerProperties kmerProperties = KmerProperties.GetInstance(); int propertyNum = kmerProperties.PropertyToNumber[propertyName]; Set <char> aaSet = Set <char> .GetInstance(); foreach (KeyValuePair <string, bool[]> aaAndBits in kmerProperties.AABits) { if (aaAndBits.Value[propertyNum]) { char aa = Biology.GetInstance().ThreeLetterAminoAcidAbbrevTo1Letter[aaAndBits.Key]; aaSet.AddNew(aa); } } return(aaSet); }
public void TriplesAppend(ref Set <string> seenTriple, ref Dictionary <string, List <string> > proteinToTripleList) { AASeq aaSeq = GetFirstAASeq(); for (int aa0Pos = 0; aa0Pos < (int)SequenceLengthOrNull; ++aa0Pos) { string posName = aaSeq.OriginalAA1Position(aa0Pos); string[] posParts = posName.Split('@'); string protein = posParts[0]; string hxb2Pos = posParts[2]; string triple = Helper.CreateTabString(protein, hxb2Pos); if (!seenTriple.Contains(triple)) { seenTriple.AddNew(triple); List <string> tripleList = proteinToTripleList.GetValueOrDefault(protein); tripleList.Add(triple); } } }
private static void CreateAndAddFeatureSupertype(string supertypeTableSource, bool subtractSupertypeFeatures, Hla hla, Pair <NEC, Hla> necAndHla, ref Set <IHashableFeature> hlaishEnumeration, Assembly assembly, string resourcePrefix) { if (subtractSupertypeFeatures) { return; } IHashableFeature featureSupertype; switch (supertypeTableSource) { case SupertypeTableSource.None: featureSupertype = null; break; default: featureSupertype = (IHashableFeature)IsSupertypeFromFile.GetInstance(hla, supertypeTableSource, assembly, resourcePrefix); break; } hlaishEnumeration.AddNew(featureSupertype); Debug.Assert((bool)((Feature)featureSupertype).Evaluate(necAndHla)); // real assert - must only generate true features }
private static List <Set <char> > CreateSequence(string aaSeqAsString) { List <Set <char> > sequence = new List <Set <char> >(); Set <char> set = null; foreach (char ch in aaSeqAsString) { switch (ch) { case '{': { SpecialFunctions.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings"); set = new Set <char>(); sequence.Add(set); break; } case '}': { SpecialFunctions.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings"); SpecialFunctions.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings"); set = null; break; } case ' ': { SpecialFunctions.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing."); break; } case '?': case '-': { SpecialFunctions.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch)); sequence.Add(Set <char> .GetInstance(ch)); break; } default: { //!!!most this to Biology? SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(ch), string.Format("The character {0} is not an amino acid", ch)); string aminoAcid = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[ch]; SpecialFunctions.CheckCondition(Biology.GetInstance().KnownAminoAcid(aminoAcid), string.Format("The character {0} is not a standard amino acid", ch)); if (set == null) { sequence.Add(Set <char> .GetInstance(ch)); } else { set.AddNew(ch); } break; } } } SpecialFunctions.CheckCondition(set == null, "Missing '}' in aaSeq string"); return(sequence); }