//!!!very similar to other code public static Dictionary <Pair <NEC, Hla>, bool> ReadTable(HlaFactory hlaFactory, string fileName, bool dedup) { Dictionary <Pair <NEC, Hla>, bool> table = new Dictionary <Pair <NEC, Hla>, bool>(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "N\tepitope\tC\thla\tlabel", false)) { string n = row["N"]; string epitope = row["epitope"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope)); string c = row["C"]; NEC nec = NEC.GetInstance(n, epitope, c); Hla hla = hlaFactory.GetGroundInstance(row["hla"]); string labelString = row["label"]; SpecialFunctions.CheckCondition(labelString == "0" || labelString == "1", "Expect label to be '0' or '1'"); Pair <NEC, Hla> pair = new Pair <NEC, Hla>(nec, hla); bool labelAsBool = (labelString == "1"); if (dedup && table.ContainsKey(pair)) { SpecialFunctions.CheckCondition(table[pair] == labelAsBool, "The example " + pair.ToString() + " appears with contradictory labels."); continue; } table.Add(pair, labelAsBool); } return(table); }
//!!!very similar to other code public static Dictionary <string, int> ReadTable(string fileName) { Dictionary <string, int> table = new Dictionary <string, int>(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "epitope\tdifferenceCount\tid\tstartAABase1", false)) { string epitope = row["epitope"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope)); int differenceCount = int.Parse(row["differenceCount"]); MaxMaxMismatches = Math.Max(MaxMaxMismatches, differenceCount); table.Add(epitope, differenceCount); } return(table); }
//static private Dictionary<Pair<NEC, Hla>, bool> CloseHuman = null; //GeneratorType.Hla | GeneratorType.Position | GeneratorType.Property | GeneratorType.AndHla | GeneratorType.Zero6Supertype | GeneratorType.AndZero6Supertype private static Set <IHashableFeature> GenerateFeatureSet( object entity, string supertypeTableSource, int?flankSizeOrNull, bool includeFlankNECFeatures, bool includeChemicalProperties, bool includeAAFeatures, bool addEiFeatures ) { bool includeAndHlaAndSTWithEpitopeAdjFeatures = false; bool subtractSupertypeFeatures = false; bool subtractHlaFeatures = false; bool substractChemAACrissCrossFeatures = false; SpecialFunctions.CheckCondition(!includeAndHlaAndSTWithEpitopeAdjFeatures || includeFlankNECFeatures); Pair <NEC, Hla> necAndHlaX = (Pair <NEC, Hla>)entity; NEC nec = (null == flankSizeOrNull) ? necAndHlaX.First : NEC.GetInstance(necAndHlaX.First, (int)flankSizeOrNull); Hla hla = necAndHlaX.Second; Debug.Assert(nec.N.Length == nec.C.Length); // real assert Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla); Set <IHashableFeature> hlaishFeatureSet = new Set <IHashableFeature>(); CreateAndAddHlaFeature(subtractHlaFeatures, hla, necAndHla, ref hlaishFeatureSet); CreateAndAddFeatureSupertype(supertypeTableSource, subtractSupertypeFeatures, hla, necAndHla, ref hlaishFeatureSet, Assembly.GetExecutingAssembly(), Predictor.ResourceString); Set <IHashableFeature> featureSet = Set <IHashableFeature> .GetInstance(hlaishFeatureSet); if (addEiFeatures) { AddEiFeatures(includeChemicalProperties, includeAAFeatures, substractChemAACrissCrossFeatures, nec, necAndHla, hlaishFeatureSet, featureSet); } if (includeFlankNECFeatures) { List <IHashableFeature> aaInNFlankFeatureList = new List <IHashableFeature>(In.GetAASeqInRegionInstance(1, necAndHla, NFlank.GetInstance())); DebugCheckThatEvaluatesToTrue(necAndHla, aaInNFlankFeatureList); if (includeAAFeatures) { featureSet.AddNewRange(aaInNFlankFeatureList); //AA in N flank featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, NFlank.GetInstance())); //AA1-AA2 in Nflank featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); //AA@x in N flank (numbering is 5 4 3 2 1) featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); //AA1-AA2@x in Nflank (x is position of AA2, i.e., the smaller number) featureSet.AddNewRange(In.GetAASeqInRegionInstance(1, necAndHla, CFlank.GetInstance())); //AA in Cflank featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, CFlank.GetInstance())); //AA1-AA2 in Cflank featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); //AA@x in C flank (numbering is 1 2 3 4 5) featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); //AA1-AA2@x in Cflank (x is position of AA1, i.e., the smaller number) } if (includeChemicalProperties) { featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, NFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, CFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, NFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, CFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); } } if (includeFlankNECFeatures) { if (includeAAFeatures) { //EV in Epitope AddFeatureWithOptionalAndHlaAndST(In.GetAASeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2 in Epitope //RR in Epitope[@1-2] AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInSubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2@x in Epitope (x is position of AA1, i.e., the smaller number) } if (includeChemicalProperties) { //polar,cyclic in Epitope AddFeatureWithOptionalAndHlaAndST(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet); //polar,large in Epitope[@8-9] AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInPropertySubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet); } //AA1-AA2 in Nflank,Epitope, etc if (null != flankSizeOrNull && (int)flankSizeOrNull > 0) { string epitope = (string)Epitope.GetInstance().Evaluate(entity); SubSeq lastNAAFeature = SubSeq.GetInstance(1, 1, false, NFlank.GetInstance()); string lastNAA = (string)lastNAAFeature.Evaluate(entity); In inLastNAA = In.GetInstance(lastNAA, lastNAAFeature); SubSeq firstEAAFeature = SubSeq.GetInstance(1, 1, true, Epitope.GetInstance()); string firstEAA = (string)firstEAAFeature.Evaluate(entity); Debug.Assert(firstEAA == epitope.Substring(0, 1));// real assert In inFirstEAA = In.GetInstance(firstEAA, firstEAAFeature); SubSeq lastEAAFeature = SubSeq.GetInstance(epitope.Length, epitope.Length, true, Epitope.GetInstance()); string lastEAA = (string)lastEAAFeature.Evaluate(entity); In inLastEAA = In.GetInstance(lastEAA, lastEAAFeature); SubSeq firstCAAFeature = SubSeq.GetInstance(1, 1, true, CFlank.GetInstance()); string firstCAA = (string)firstCAAFeature.Evaluate(entity); In inFirstCAA = In.GetInstance(firstCAA, firstCAAFeature); if (includeAAFeatures) { And andLastNNAAFirstEAA = And.GetInstance(inLastNAA, inFirstEAA); AddFeatureWithOptionalAndHlaAndST(andLastNNAAFirstEAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet); And andLastEAAFirstCAA = And.GetInstance(inLastEAA, inFirstCAA); AddFeatureWithOptionalAndHlaAndST(andLastEAAFirstCAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet); } if (includeChemicalProperties) { foreach (string lastNProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastNAA[0]]]) { InProperty inLastNProperty = InProperty.GetInstance(lastNProperty, lastNAAFeature); foreach (string firstEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstEAA[0]]]) { InProperty inFirstEProperty = InProperty.GetInstance(firstEProperty, firstEAAFeature); //!!!get this out of the loop? And andLastNPropertyFirstEProperty = And.GetInstance(inLastNProperty, inFirstEProperty); AddFeatureWithOptionalAndHlaAndST(andLastNPropertyFirstEProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet); Debug.Assert((bool)andLastNPropertyFirstEProperty.Evaluate(necAndHla)); } } foreach (string lastEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastEAA[0]]]) { InProperty inlastEProperty = InProperty.GetInstance(lastEProperty, lastEAAFeature); foreach (string firstCProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstCAA[0]]]) { InProperty infirstCProperty = InProperty.GetInstance(firstCProperty, firstCAAFeature); //!!!get this out of the loop? And andlastEPropertyfirstCProperty = And.GetInstance(inlastEProperty, infirstCProperty); AddFeatureWithOptionalAndHlaAndST(andlastEPropertyfirstCProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet); Debug.Assert((bool)andlastEPropertyfirstCProperty.Evaluate(necAndHla)); } } } } } return(featureSet); }
private static List <Set <char> > CreateSequence(string aaSeqAsString) { List <Set <char> > sequence = new List <Set <char> >(); Set <char> set = null; foreach (char ch in aaSeqAsString) { switch (ch) { case '{': { SpecialFunctions.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings"); set = new Set <char>(); sequence.Add(set); break; } case '}': { SpecialFunctions.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings"); SpecialFunctions.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings"); set = null; break; } case ' ': { SpecialFunctions.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing."); break; } case '?': case '-': { SpecialFunctions.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch)); sequence.Add(Set <char> .GetInstance(ch)); break; } default: { //!!!most this to Biology? SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(ch), string.Format("The character {0} is not an amino acid", ch)); string aminoAcid = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[ch]; SpecialFunctions.CheckCondition(Biology.GetInstance().KnownAminoAcid(aminoAcid), string.Format("The character {0} is not a standard amino acid", ch)); if (set == null) { sequence.Add(Set <char> .GetInstance(ch)); } else { set.AddNew(ch); } break; } } } SpecialFunctions.CheckCondition(set == null, "Missing '}' in aaSeq string"); return(sequence); }