//!!!very similar to other code
        public static Dictionary <Pair <NEC, Hla>, bool> ReadTable(HlaFactory hlaFactory, string fileName, bool dedup)
        {
            Dictionary <Pair <NEC, Hla>, bool> table = new Dictionary <Pair <NEC, Hla>, bool>();

            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "N\tepitope\tC\thla\tlabel", false))
            {
                string n       = row["N"];
                string epitope = row["epitope"];
                SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope));
                string c           = row["C"];
                NEC    nec         = NEC.GetInstance(n, epitope, c);
                Hla    hla         = hlaFactory.GetGroundInstance(row["hla"]);
                string labelString = row["label"];
                SpecialFunctions.CheckCondition(labelString == "0" || labelString == "1", "Expect label to be '0' or '1'");
                Pair <NEC, Hla> pair        = new Pair <NEC, Hla>(nec, hla);
                bool            labelAsBool = (labelString == "1");
                if (dedup && table.ContainsKey(pair))
                {
                    SpecialFunctions.CheckCondition(table[pair] == labelAsBool, "The example " + pair.ToString() + " appears with contradictory labels.");
                    continue;
                }
                table.Add(pair, labelAsBool);
            }
            return(table);
        }
        //!!!very similar to other code
        public static Dictionary <string, int> ReadTable(string fileName)
        {
            Dictionary <string, int> table = new Dictionary <string, int>();

            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "epitope\tdifferenceCount\tid\tstartAABase1", false))
            {
                string epitope = row["epitope"];
                SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope));
                int differenceCount = int.Parse(row["differenceCount"]);
                MaxMaxMismatches = Math.Max(MaxMaxMismatches, differenceCount);
                table.Add(epitope, differenceCount);
            }
            return(table);
        }
        //static private Dictionary<Pair<NEC, Hla>, bool> CloseHuman = null;

        //GeneratorType.Hla | GeneratorType.Position | GeneratorType.Property | GeneratorType.AndHla | GeneratorType.Zero6Supertype | GeneratorType.AndZero6Supertype
        private static Set <IHashableFeature> GenerateFeatureSet(
            object entity, string supertypeTableSource,
            int?flankSizeOrNull,
            bool includeFlankNECFeatures,
            bool includeChemicalProperties, bool includeAAFeatures,
            bool addEiFeatures
            )
        {
            bool includeAndHlaAndSTWithEpitopeAdjFeatures = false;
            bool subtractSupertypeFeatures         = false;
            bool subtractHlaFeatures               = false;
            bool substractChemAACrissCrossFeatures = false;


            SpecialFunctions.CheckCondition(!includeAndHlaAndSTWithEpitopeAdjFeatures || includeFlankNECFeatures);

            Pair <NEC, Hla> necAndHlaX = (Pair <NEC, Hla>)entity;
            NEC             nec        = (null == flankSizeOrNull) ? necAndHlaX.First : NEC.GetInstance(necAndHlaX.First, (int)flankSizeOrNull);
            Hla             hla        = necAndHlaX.Second;

            Debug.Assert(nec.N.Length == nec.C.Length); // real assert
            Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla);

            Set <IHashableFeature> hlaishFeatureSet = new Set <IHashableFeature>();

            CreateAndAddHlaFeature(subtractHlaFeatures, hla, necAndHla, ref hlaishFeatureSet);
            CreateAndAddFeatureSupertype(supertypeTableSource, subtractSupertypeFeatures, hla, necAndHla, ref hlaishFeatureSet, Assembly.GetExecutingAssembly(), Predictor.ResourceString);

            Set <IHashableFeature> featureSet = Set <IHashableFeature> .GetInstance(hlaishFeatureSet);

            if (addEiFeatures)
            {
                AddEiFeatures(includeChemicalProperties, includeAAFeatures, substractChemAACrissCrossFeatures, nec, necAndHla, hlaishFeatureSet, featureSet);
            }



            if (includeFlankNECFeatures)
            {
                List <IHashableFeature> aaInNFlankFeatureList = new List <IHashableFeature>(In.GetAASeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                DebugCheckThatEvaluatesToTrue(necAndHla, aaInNFlankFeatureList);



                if (includeAAFeatures)
                {
                    featureSet.AddNewRange(aaInNFlankFeatureList);                                                    //AA in N flank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));          //AA1-AA2 in Nflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); //AA@x in N flank (numbering is 5 4 3 2 1)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); //AA1-AA2@x in Nflank (x is position of AA2, i.e., the smaller number)


                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));         //AA in Cflank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));         //AA1-AA2 in Cflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); //AA@x in C flank (numbering is 1 2 3 4 5)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); //AA1-AA2@x in Cflank (x is position of AA1, i.e., the smaller number)
                }

                if (includeChemicalProperties)
                {
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla));
                }
            }
            if (includeFlankNECFeatures)
            {
                if (includeAAFeatures)
                {
                    //EV in Epitope
                    AddFeatureWithOptionalAndHlaAndST(In.GetAASeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2 in Epitope

                    //RR in Epitope[@1-2]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInSubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2@x in Epitope (x is position of AA1, i.e., the smaller number)
                }
                if (includeChemicalProperties)
                {
                    //polar,cyclic in Epitope
                    AddFeatureWithOptionalAndHlaAndST(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                    //polar,large in Epitope[@8-9]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInPropertySubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                }

                //AA1-AA2 in Nflank,Epitope, etc
                if (null != flankSizeOrNull && (int)flankSizeOrNull > 0)
                {
                    string epitope = (string)Epitope.GetInstance().Evaluate(entity);

                    SubSeq lastNAAFeature = SubSeq.GetInstance(1, 1, false, NFlank.GetInstance());
                    string lastNAA        = (string)lastNAAFeature.Evaluate(entity);
                    In     inLastNAA      = In.GetInstance(lastNAA, lastNAAFeature);

                    SubSeq firstEAAFeature = SubSeq.GetInstance(1, 1, true, Epitope.GetInstance());
                    string firstEAA        = (string)firstEAAFeature.Evaluate(entity);
                    Debug.Assert(firstEAA == epitope.Substring(0, 1));// real assert
                    In inFirstEAA = In.GetInstance(firstEAA, firstEAAFeature);

                    SubSeq lastEAAFeature = SubSeq.GetInstance(epitope.Length, epitope.Length, true, Epitope.GetInstance());
                    string lastEAA        = (string)lastEAAFeature.Evaluate(entity);
                    In     inLastEAA      = In.GetInstance(lastEAA, lastEAAFeature);

                    SubSeq firstCAAFeature = SubSeq.GetInstance(1, 1, true, CFlank.GetInstance());
                    string firstCAA        = (string)firstCAAFeature.Evaluate(entity);
                    In     inFirstCAA      = In.GetInstance(firstCAA, firstCAAFeature);

                    if (includeAAFeatures)
                    {
                        And andLastNNAAFirstEAA = And.GetInstance(inLastNAA, inFirstEAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastNNAAFirstEAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);

                        And andLastEAAFirstCAA = And.GetInstance(inLastEAA, inFirstCAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastEAAFirstCAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);
                    }

                    if (includeChemicalProperties)
                    {
                        foreach (string lastNProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastNAA[0]]])
                        {
                            InProperty inLastNProperty = InProperty.GetInstance(lastNProperty, lastNAAFeature);

                            foreach (string firstEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstEAA[0]]])
                            {
                                InProperty inFirstEProperty = InProperty.GetInstance(firstEProperty, firstEAAFeature); //!!!get this out of the loop?
                                And        andLastNPropertyFirstEProperty = And.GetInstance(inLastNProperty, inFirstEProperty);
                                AddFeatureWithOptionalAndHlaAndST(andLastNPropertyFirstEProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andLastNPropertyFirstEProperty.Evaluate(necAndHla));
                            }
                        }
                        foreach (string lastEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastEAA[0]]])
                        {
                            InProperty inlastEProperty = InProperty.GetInstance(lastEProperty, lastEAAFeature);

                            foreach (string firstCProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstCAA[0]]])
                            {
                                InProperty infirstCProperty = InProperty.GetInstance(firstCProperty, firstCAAFeature); //!!!get this out of the loop?
                                And        andlastEPropertyfirstCProperty = And.GetInstance(inlastEProperty, infirstCProperty);
                                AddFeatureWithOptionalAndHlaAndST(andlastEPropertyfirstCProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andlastEPropertyfirstCProperty.Evaluate(necAndHla));
                            }
                        }
                    }
                }
            }

            return(featureSet);
        }
Beispiel #4
0
        private static List <Set <char> > CreateSequence(string aaSeqAsString)
        {
            List <Set <char> > sequence = new List <Set <char> >();

            Set <char> set = null;

            foreach (char ch in aaSeqAsString)
            {
                switch (ch)
                {
                case '{':
                {
                    SpecialFunctions.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings");
                    set = new Set <char>();
                    sequence.Add(set);
                    break;
                }

                case '}':
                {
                    SpecialFunctions.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings");
                    SpecialFunctions.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings");
                    set = null;
                    break;
                }

                case ' ':
                {
                    SpecialFunctions.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing.");
                    break;
                }

                case '?':
                case '-':
                {
                    SpecialFunctions.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch));
                    sequence.Add(Set <char> .GetInstance(ch));
                    break;
                }

                default:
                {
                    //!!!most this to Biology?
                    SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(ch),
                                                    string.Format("The character {0} is not an amino acid", ch));
                    string aminoAcid = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[ch];
                    SpecialFunctions.CheckCondition(Biology.GetInstance().KnownAminoAcid(aminoAcid),
                                                    string.Format("The character {0} is not a standard amino acid", ch));
                    if (set == null)
                    {
                        sequence.Add(Set <char> .GetInstance(ch));
                    }
                    else
                    {
                        set.AddNew(ch);
                    }
                    break;
                }
                }
            }
            SpecialFunctions.CheckCondition(set == null, "Missing '}' in aaSeq string");
            return(sequence);
        }