public static NEC GetInstanceWithPossibleNulls(int flankingSize, string mer, string protein, int position) { string e = protein.Substring(position, mer.Length); string n; if (position - flankingSize < 0) { //Console.WriteLine("Warning: matching position in protein too close to start to create left flanking region"); n = null; } else { n = protein.Substring(position - flankingSize, flankingSize); } string c; if (position + mer.Length + flankingSize > protein.Length) { //Console.WriteLine("Warning: matching position in protein too close to end to create right flanking region"); c = null; } else { c = protein.Substring(position + mer.Length, flankingSize); } NEC nec = NEC.GetInstance(n, e, c); return(nec); }
public static IEnumerable <IHashableFeature> GetNFlankAny1AndNTermEAny2(NEC nec, string nflankAny, string nTermEAny) { NFlank nFlankFeature = NFlank.GetInstance(); string nFlankRegion = nFlankFeature.FeatureFunction(nec); if (-1 == nFlankRegion.IndexOfAny(nflankAny.ToCharArray())) { yield break; } NTermE nTermEFeature = NTermE.GetInstance(); string nTermERegion = nTermEFeature.FeatureFunction(nec); if (-1 == nTermERegion.IndexOfAny(nTermEAny.ToCharArray())) { yield break; } AnyIn anyFeature1 = AnyIn.GetInstance(nflankAny, nFlankFeature); AnyIn anyFeature2 = AnyIn.GetInstance(nTermEAny, nTermEFeature); And andFeature = And.GetInstance(anyFeature1, anyFeature2); Debug.Assert((bool)andFeature.Evaluate(nec)); // real assert - must only generate true features yield return(andFeature); }
internal static NEC GetInstance(NEC input, int flankSize) { SpecialFunctions.CheckCondition(input.N.Length >= flankSize && input.C.Length >= flankSize, "The input flank size should not be larger than the new flank size"); NEC output = NEC.GetInstance(input.N.Remove(0, input.N.Length - flankSize), input.E, input.C.Substring(0, flankSize)); return(output); }
public static NEC GetRandomInstance(NEC sample, Dictionary <char, int> aaToCount, ref Random random) { string n = RandomAAString(aaToCount, sample.N.Length, ref random); string e = RandomAAString(aaToCount, sample.E.Length, ref random); string c = RandomAAString(aaToCount, sample.C.Length, ref random); return(NEC.GetInstance(n, e, c)); }
private static string GetAminoAcidFromEpitopePosition(NEC nec, int i) { char chAminoAcid = nec.E[i]; string aminoAcid = VirusCount.Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[chAminoAcid]; SpecialFunctions.CheckCondition(aminoAcid != null); return(aminoAcid); }
public static NEC GetInstance(string peptide, int nLength, int eLength, int cLength) { SpecialFunctions.CheckCondition(peptide.Length == nLength + eLength + cLength, "NEC.GetInstance lengths are wrong"); NEC nec = GetInstance(peptide.Substring(0, nLength), peptide.Substring(nLength, eLength), peptide.Substring(nLength + eLength, cLength)); Debug.Assert(nec.N + nec.E + nec.C == peptide); // real assert return(nec); }
public static NEC GetConsensusInstanceWithPossibleNullFlanks(string mer, Set <NEC> necSet, ref Random random) { DebugCheckMerSameLengthAsEInSet(mer, necSet); string n = FindConsensusOrNull(necSet, GetN, ref random); string c = FindConsensusOrNull(necSet, GetC, ref random); NEC nec = NEC.GetInstance(n, mer, c); return(nec); }
static public NEC GetInstance(string n, string e, string c) { NEC nec = new NEC(); nec.N = n; nec.E = e; nec.C = c; nec._asString = SpecialFunctions.CreateDelimitedString(",", n, e, c); return(nec); }
//Dropping strong typing so that can handle both NEC and Pair<NEC,HLA> public string FeatureFunction(object entity) { NEC nec = entity as NEC; if (null == nec) { nec = ((Pair <NEC, Hla>)entity).First; } return(nec.E); }
public override bool Equals(object obj) { NEC other = obj as NEC; if (other == null) { return(false); } else { return(other._asString.Equals(_asString)); } }
public static NEC GetInstance(string necString) { string[] fieldCollection = necString.Split(','); SpecialFunctions.CheckCondition(fieldCollection.Length == 3); for (int iField = 0; iField < fieldCollection.Length; ++iField) { if (fieldCollection[iField] == "null") { fieldCollection[iField] = null; } } NEC nec = NEC.GetInstance(fieldCollection[0], fieldCollection[1], fieldCollection[2]); return(nec); }
public static IEnumerable <IHashableFeature> GetPropertySeqInRegionInstance(int k, Pair <NEC, Hla> necAndHla, IHashableFeature regionFeature) { NEC nec = necAndHla.First; string region = (string)((Feature)regionFeature).Evaluate(necAndHla); Set <string> aaSeqSet = Set <string> .GetInstance(SpecialFunctions.SubstringEnumeration(region, k)); foreach (string aaSeq in aaSeqSet) { foreach (List <string> propertyCombination in KmerProperties.EveryPropertyCombination(aaSeq)) { string propertySeq = SpecialFunctions.Join(",", propertyCombination); InProperty feature = InProperty.GetInstance(propertySeq, regionFeature); Debug.Assert((bool)feature.Evaluate(necAndHla)); // real assert - must only generate true features yield return(feature); } } }
public string FeatureFunction(NEC nec) { string aaSeq = nec.E.Substring(0, 2); return(aaSeq); }
public static IEnumerable <IHashableFeature> GetAndNFlankNotNTermEEnumeration(NEC nec) { NFlank nFlankFeature = NFlank.GetInstance(); string nFlankRegion = nFlankFeature.FeatureFunction(nec); Set <string> aaSeqSet = Set <string> .GetInstance(SpecialFunctions.SubstringEnumeration(nFlankRegion, 1)); foreach (string aaSeq in aaSeqSet) { Not notFeature = Not.GetInstance(In.GetInstance(aaSeq, NTermE.GetInstance())); //Only generate the feature when the amino acid is not in NTermE if ((bool)notFeature.Evaluate(nec)) { In inFeature = In.GetInstance(aaSeq, nFlankFeature); And andFeature = And.GetInstance(inFeature, notFeature); Debug.Assert((bool)andFeature.Evaluate(nec)); // real assert - must only generate true features yield return(andFeature); } } }
static public string GetN(NEC nec) { return(nec.N); }
public string FeatureFunction(NEC nec) { string aaSeq = nec.E.Substring(nec.E.Length - 1); return(aaSeq); }
static public string GetE(NEC nec) { return(nec.E); }
private static void AddEiFeatures(bool includeChemicalProperties, bool includeAAFeatures, bool substractChemAACrissCrossFeatures, NEC nec, Pair <NEC, Hla> necAndHla, Set <IHashableFeature> hlaishFeatureSet, Set <IHashableFeature> featureSet) { for (int i = 0; i < nec.E.Length; ++i) { IHashableFeature featureE = E.GetInstance(i + 1); string aminoAcid = GetAminoAcidFromEpitopePosition(nec, i); if (includeAAFeatures) { IsAA featureAA = IsAA.GetInstance(aminoAcid, featureE); featureSet.AddNew(featureAA); Debug.Assert((bool)featureAA.Evaluate(necAndHla)); // real assert - must only generate true features foreach (IHashableFeature hlaishFeature in hlaishFeatureSet) { if (substractChemAACrissCrossFeatures && hlaishFeature is HasAAProp) { continue; } And featureHlaishAndAA = And.GetInstance(hlaishFeature, featureAA); featureSet.AddNew(featureHlaishAndAA); Debug.Assert((bool)featureHlaishAndAA.Evaluate(necAndHla)); // real assert - must only generate true features } } if (includeChemicalProperties) { foreach (string property in VirusCount.KmerProperties.AaToPropList[aminoAcid]) { HasAAProp featureAAProp = HasAAProp.GetInstance(property, featureE); featureSet.AddNew(featureAAProp); Debug.Assert((bool)featureAAProp.Evaluate(necAndHla)); // real assert - must only generate true features foreach (IHashableFeature hlaishFeature in hlaishFeatureSet) { if (substractChemAACrissCrossFeatures && hlaishFeature is IsAA) { continue; } And featureHlaishAndAAProb = And.GetInstance(hlaishFeature, featureAAProp); featureSet.AddNew(featureHlaishAndAAProb); Debug.Assert((bool)featureHlaishAndAAProb.Evaluate(necAndHla)); // real assert - must only generate true features } } } } //All of the above with AA replaced by chemical property of AA }
//static private Dictionary<Pair<NEC, Hla>, bool> CloseHuman = null; //GeneratorType.Hla | GeneratorType.Position | GeneratorType.Property | GeneratorType.AndHla | GeneratorType.Zero6Supertype | GeneratorType.AndZero6Supertype private static Set <IHashableFeature> GenerateFeatureSet( object entity, string supertypeTableSource, int?flankSizeOrNull, bool includeFlankNECFeatures, bool includeChemicalProperties, bool includeAAFeatures, bool addEiFeatures ) { bool includeAndHlaAndSTWithEpitopeAdjFeatures = false; bool subtractSupertypeFeatures = false; bool subtractHlaFeatures = false; bool substractChemAACrissCrossFeatures = false; SpecialFunctions.CheckCondition(!includeAndHlaAndSTWithEpitopeAdjFeatures || includeFlankNECFeatures); Pair <NEC, Hla> necAndHlaX = (Pair <NEC, Hla>)entity; NEC nec = (null == flankSizeOrNull) ? necAndHlaX.First : NEC.GetInstance(necAndHlaX.First, (int)flankSizeOrNull); Hla hla = necAndHlaX.Second; Debug.Assert(nec.N.Length == nec.C.Length); // real assert Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla); Set <IHashableFeature> hlaishFeatureSet = new Set <IHashableFeature>(); CreateAndAddHlaFeature(subtractHlaFeatures, hla, necAndHla, ref hlaishFeatureSet); CreateAndAddFeatureSupertype(supertypeTableSource, subtractSupertypeFeatures, hla, necAndHla, ref hlaishFeatureSet, Assembly.GetExecutingAssembly(), Predictor.ResourceString); Set <IHashableFeature> featureSet = Set <IHashableFeature> .GetInstance(hlaishFeatureSet); if (addEiFeatures) { AddEiFeatures(includeChemicalProperties, includeAAFeatures, substractChemAACrissCrossFeatures, nec, necAndHla, hlaishFeatureSet, featureSet); } if (includeFlankNECFeatures) { List <IHashableFeature> aaInNFlankFeatureList = new List <IHashableFeature>(In.GetAASeqInRegionInstance(1, necAndHla, NFlank.GetInstance())); DebugCheckThatEvaluatesToTrue(necAndHla, aaInNFlankFeatureList); if (includeAAFeatures) { featureSet.AddNewRange(aaInNFlankFeatureList); //AA in N flank featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, NFlank.GetInstance())); //AA1-AA2 in Nflank featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); //AA@x in N flank (numbering is 5 4 3 2 1) featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); //AA1-AA2@x in Nflank (x is position of AA2, i.e., the smaller number) featureSet.AddNewRange(In.GetAASeqInRegionInstance(1, necAndHla, CFlank.GetInstance())); //AA in Cflank featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, CFlank.GetInstance())); //AA1-AA2 in Cflank featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); //AA@x in C flank (numbering is 1 2 3 4 5) featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); //AA1-AA2@x in Cflank (x is position of AA1, i.e., the smaller number) } if (includeChemicalProperties) { featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, NFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, CFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, NFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, CFlank.GetInstance())); featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); } } if (includeFlankNECFeatures) { if (includeAAFeatures) { //EV in Epitope AddFeatureWithOptionalAndHlaAndST(In.GetAASeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2 in Epitope //RR in Epitope[@1-2] AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInSubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2@x in Epitope (x is position of AA1, i.e., the smaller number) } if (includeChemicalProperties) { //polar,cyclic in Epitope AddFeatureWithOptionalAndHlaAndST(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet); //polar,large in Epitope[@8-9] AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInPropertySubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet); } //AA1-AA2 in Nflank,Epitope, etc if (null != flankSizeOrNull && (int)flankSizeOrNull > 0) { string epitope = (string)Epitope.GetInstance().Evaluate(entity); SubSeq lastNAAFeature = SubSeq.GetInstance(1, 1, false, NFlank.GetInstance()); string lastNAA = (string)lastNAAFeature.Evaluate(entity); In inLastNAA = In.GetInstance(lastNAA, lastNAAFeature); SubSeq firstEAAFeature = SubSeq.GetInstance(1, 1, true, Epitope.GetInstance()); string firstEAA = (string)firstEAAFeature.Evaluate(entity); Debug.Assert(firstEAA == epitope.Substring(0, 1));// real assert In inFirstEAA = In.GetInstance(firstEAA, firstEAAFeature); SubSeq lastEAAFeature = SubSeq.GetInstance(epitope.Length, epitope.Length, true, Epitope.GetInstance()); string lastEAA = (string)lastEAAFeature.Evaluate(entity); In inLastEAA = In.GetInstance(lastEAA, lastEAAFeature); SubSeq firstCAAFeature = SubSeq.GetInstance(1, 1, true, CFlank.GetInstance()); string firstCAA = (string)firstCAAFeature.Evaluate(entity); In inFirstCAA = In.GetInstance(firstCAA, firstCAAFeature); if (includeAAFeatures) { And andLastNNAAFirstEAA = And.GetInstance(inLastNAA, inFirstEAA); AddFeatureWithOptionalAndHlaAndST(andLastNNAAFirstEAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet); And andLastEAAFirstCAA = And.GetInstance(inLastEAA, inFirstCAA); AddFeatureWithOptionalAndHlaAndST(andLastEAAFirstCAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet); } if (includeChemicalProperties) { foreach (string lastNProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastNAA[0]]]) { InProperty inLastNProperty = InProperty.GetInstance(lastNProperty, lastNAAFeature); foreach (string firstEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstEAA[0]]]) { InProperty inFirstEProperty = InProperty.GetInstance(firstEProperty, firstEAAFeature); //!!!get this out of the loop? And andLastNPropertyFirstEProperty = And.GetInstance(inLastNProperty, inFirstEProperty); AddFeatureWithOptionalAndHlaAndST(andLastNPropertyFirstEProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet); Debug.Assert((bool)andLastNPropertyFirstEProperty.Evaluate(necAndHla)); } } foreach (string lastEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastEAA[0]]]) { InProperty inlastEProperty = InProperty.GetInstance(lastEProperty, lastEAAFeature); foreach (string firstCProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstCAA[0]]]) { InProperty infirstCProperty = InProperty.GetInstance(firstCProperty, firstCAAFeature); //!!!get this out of the loop? And andlastEPropertyfirstCProperty = And.GetInstance(inlastEProperty, infirstCProperty); AddFeatureWithOptionalAndHlaAndST(andlastEPropertyfirstCProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet); Debug.Assert((bool)andlastEPropertyfirstCProperty.Evaluate(necAndHla)); } } } } } return(featureSet); }
public static bool TryCreateInstance(string epitopeString, int aa1PosStart, int aa1PosLast, AASeq aaSeqConsensus, out NEC nec) { AASeq aaSeqSubConsensus = aaSeqConsensus.SubSeqAA1Pos(aa1PosStart, epitopeString.Length); Debug.Assert(aaSeqSubConsensus.OriginalAA1Position(0) == aa1PosStart.ToString()); // real assert string consensusAsString = aaSeqConsensus.ToString(); int flankingSize = 5; //Create a string builder to file from back to front StringBuilder sbN = new StringBuilder(new string(' ', flankingSize)); int sbNIndex = flankingSize - 1; //Look at the characters of the concensus moving to the left for (int aa1Pos = aa1PosStart - 1;; --aa1Pos) { if (aa1Pos < 1) { Console.WriteLine("Warning: for epitope {0}, the epitope position is too close to the left of the protein to create a c region of length 5. Skipping epitope", epitopeString); nec = null; return(false); } char aa = consensusAsString[aa1Pos - 1]; if (aa == '-') { continue; } sbN[sbNIndex] = aa; --sbNIndex; if (sbNIndex < 0) { break; } } StringBuilder sbC = new StringBuilder(); //Look at the characters of the concensus moving to the right for (int aa1Pos = aa1PosLast + 1; ; ++aa1Pos) { if (aa1Pos > consensusAsString.Length) { Console.WriteLine("Warning: for epitope {0}, the epitope position is too close to the right of the protein to create a n region of length 5. Skipping epitope", epitopeString); nec = null; return(false); } char aa = consensusAsString[aa1Pos - 1]; if (aa == '-') { continue; } sbC.Append(aa); if (sbC.Length == flankingSize) { break; } } nec = NEC.GetInstance(sbN.ToString(), epitopeString, sbC.ToString()); return(true); }
static public string GetC(NEC nec) { return(nec.C); }