public static IEnumerable <IHashableFeature> GetNFlankAny1AndNTermEAny2(NEC nec, string nflankAny, string nTermEAny)
        {
            NFlank nFlankFeature = NFlank.GetInstance();
            string nFlankRegion  = nFlankFeature.FeatureFunction(nec);

            if (-1 == nFlankRegion.IndexOfAny(nflankAny.ToCharArray()))
            {
                yield break;
            }

            NTermE nTermEFeature = NTermE.GetInstance();
            string nTermERegion  = nTermEFeature.FeatureFunction(nec);

            if (-1 == nTermERegion.IndexOfAny(nTermEAny.ToCharArray()))
            {
                yield break;
            }

            AnyIn anyFeature1 = AnyIn.GetInstance(nflankAny, nFlankFeature);
            AnyIn anyFeature2 = AnyIn.GetInstance(nTermEAny, nTermEFeature);
            And   andFeature  = And.GetInstance(anyFeature1, anyFeature2);

            Debug.Assert((bool)andFeature.Evaluate(nec)); // real assert - must only generate true features
            yield return(andFeature);
        }
 private static void AddFeatureWithOptionalAndHlaAndST(IHashableFeature feature, bool includeAndHlaAndST, IEnumerable <IHashableFeature> hlaishEnumeration, bool checkThatNew, ref Set <IHashableFeature> featureSet)
 {
     featureSet.Add(feature, checkThatNew);
     if (includeAndHlaAndST)
     {
         foreach (IHashableFeature hlaishFeature in hlaishEnumeration)
         {
             And featureAndHlaish = And.GetInstance(hlaishFeature, feature);
             featureSet.Add(featureAndHlaish, checkThatNew);
         }
     }
 }
        private static void AddEiFeatures(bool includeChemicalProperties, bool includeAAFeatures, bool substractChemAACrissCrossFeatures, NEC nec, Pair <NEC, Hla> necAndHla, Set <IHashableFeature> hlaishFeatureSet, Set <IHashableFeature> featureSet)
        {
            for (int i = 0; i < nec.E.Length; ++i)
            {
                IHashableFeature featureE  = E.GetInstance(i + 1);
                string           aminoAcid = GetAminoAcidFromEpitopePosition(nec, i);

                if (includeAAFeatures)
                {
                    IsAA featureAA = IsAA.GetInstance(aminoAcid, featureE);
                    featureSet.AddNew(featureAA);
                    Debug.Assert((bool)featureAA.Evaluate(necAndHla)); // real assert - must only generate true features

                    foreach (IHashableFeature hlaishFeature in hlaishFeatureSet)
                    {
                        if (substractChemAACrissCrossFeatures && hlaishFeature is HasAAProp)
                        {
                            continue;
                        }

                        And featureHlaishAndAA = And.GetInstance(hlaishFeature, featureAA);
                        featureSet.AddNew(featureHlaishAndAA);
                        Debug.Assert((bool)featureHlaishAndAA.Evaluate(necAndHla)); // real assert - must only generate true features
                    }
                }

                if (includeChemicalProperties)
                {
                    foreach (string property in VirusCount.KmerProperties.AaToPropList[aminoAcid])
                    {
                        HasAAProp featureAAProp = HasAAProp.GetInstance(property, featureE);
                        featureSet.AddNew(featureAAProp);
                        Debug.Assert((bool)featureAAProp.Evaluate(necAndHla)); // real assert - must only generate true features

                        foreach (IHashableFeature hlaishFeature in hlaishFeatureSet)
                        {
                            if (substractChemAACrissCrossFeatures && hlaishFeature is IsAA)
                            {
                                continue;
                            }

                            And featureHlaishAndAAProb = And.GetInstance(hlaishFeature, featureAAProp);
                            featureSet.AddNew(featureHlaishAndAAProb);
                            Debug.Assert((bool)featureHlaishAndAAProb.Evaluate(necAndHla)); // real assert - must only generate true features
                        }
                    }
                }
            }

            //All of the above with AA replaced by chemical property of AA
        }
        public static IEnumerable <IHashableFeature> GetAndNFlankNotNTermEEnumeration(NEC nec)
        {
            NFlank       nFlankFeature = NFlank.GetInstance();
            string       nFlankRegion  = nFlankFeature.FeatureFunction(nec);
            Set <string> aaSeqSet      = Set <string> .GetInstance(SpecialFunctions.SubstringEnumeration(nFlankRegion, 1));

            foreach (string aaSeq in aaSeqSet)
            {
                Not notFeature = Not.GetInstance(In.GetInstance(aaSeq, NTermE.GetInstance()));
                //Only generate the feature when the amino acid is not in NTermE
                if ((bool)notFeature.Evaluate(nec))
                {
                    In  inFeature  = In.GetInstance(aaSeq, nFlankFeature);
                    And andFeature = And.GetInstance(inFeature, notFeature);
                    Debug.Assert((bool)andFeature.Evaluate(nec)); // real assert - must only generate true features
                    yield return(andFeature);
                }
            }
        }
 new public static KeepTest <Dictionary <string, string> > GetInstance(string inputDirectory,
                                                                       string binarySeqFileName, string hlaFileName, string keepTestName, int merSize, Dictionary <int, string> pidToCaseName)
 {
     //!!!would be nice of classes could parse themselves
     if (keepTestName.StartsWith(KeepEndOfGag.Prefix))
     {
         bool keepIt = bool.Parse(keepTestName.Substring(KeepEndOfGag.Prefix.Length));
         return(KeepEndOfGag.GetInstance(keepIt));
     }
     else if (keepTestName.StartsWith(K1.Prefix))
     {
         int k1 = int.Parse(keepTestName.Substring(K1.Prefix.Length));
         return(K1.GetInstance(k1));
     }
     //else if (keepTestName.StartsWith(K2.Prefix))
     //{
     //    int k2 = int.Parse(keepTestName.Substring(K2.Prefix.Length));
     //    return K2.GetInstance(k2);
     //}
     else if (keepTestName.StartsWith(KeepNonOverlappingAA.Prefix))
     {
         return(KeepNonOverlappingAA.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepGene.Prefix))
     {
         string geneRange = keepTestName.Substring(KeepGene.Prefix.Length);
         return(KeepGene.GetInstance(geneRange));
     }
     else if (keepTestName.StartsWith(KeepSpecificRows.Prefix))
     {
         return(KeepSpecificRows.GetInstance(keepTestName.Substring(KeepSpecificRows.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepSpecificRow.Prefix))
     {
         return(KeepSpecificRow.GetInstance(keepTestName.Substring(KeepSpecificRow.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepSpecificGenes.Prefix))
     {
         return(KeepSpecificGenes.GetInstance(keepTestName.Substring(KeepSpecificGenes.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepOneOfAAPair.Prefix))
     {
         return(KeepOneOfAAPair.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepAllButSamePosition.Prefix))
     {
         return(KeepAllButSamePosition.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepAllButSameDeletion.Prefix))
     {
         return(KeepAllButSameDeletion.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepNonTrivialRows.Prefix))
     {
         return(new KeepNonTrivialRows());
     }
     else if (keepTestName.StartsWith(KeepTestTemp.Prefix))
     {
         return(KeepTestTemp.GetInstance());
     }
     //else if (keepTestName.StartsWith(KeepPollockOneDirection.Prefix))
     //{
     //    return KeepPollockOneDirection.GetInstance(keepTestName.Substring(KeepPollockOneDirection.Prefix.Length));
     //}
     //else if (keepTestName.StartsWith(KeepFisherOneDirection.Prefix))
     //{
     //    return KeepFisherOneDirection.GetInstance(keepTestName.Substring(KeepFisherOneDirection.Prefix.Length));
     //}
     else if (keepTestName.StartsWith(KeepNonRare.Prefix))
     {
         return(KeepNonRare.GetInstance(keepTestName.Substring(KeepNonRare.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepPredictorTargetPairs.Prefix))
     {
         return(KeepPredictorTargetPairs.GetInstance(keepTestName.Substring(KeepPredictorTargetPairs.Prefix.Length)));
     }
     else if (keepTestName.StartsWith("JointGagPolTest"))
     {
         return(And <Dictionary <string, string> > .GetInstance(
                    //KeepRandom<Dictionary<string,string>>.GetInstance(0, 0.001), // how do we make it the same when we count and when we really run through it?
                    KeepOneOfAAPair.GetInstance(),
                    KeepNonOverlappingAA.GetInstance(),
                    KeepSpecificGenes.GetInstance(keepTestName.Substring("JointGagPolTest".Length))));
     }
     else
     {
         return(KeepTest <TRow> .GetInstance(inputDirectory, binarySeqFileName, null, keepTestName, merSize, pidToCaseName));
     }
 }
        //static private Dictionary<Pair<NEC, Hla>, bool> CloseHuman = null;

        //GeneratorType.Hla | GeneratorType.Position | GeneratorType.Property | GeneratorType.AndHla | GeneratorType.Zero6Supertype | GeneratorType.AndZero6Supertype
        private static Set <IHashableFeature> GenerateFeatureSet(
            object entity, string supertypeTableSource,
            int?flankSizeOrNull,
            bool includeFlankNECFeatures,
            bool includeChemicalProperties, bool includeAAFeatures,
            bool addEiFeatures
            )
        {
            bool includeAndHlaAndSTWithEpitopeAdjFeatures = false;
            bool subtractSupertypeFeatures         = false;
            bool subtractHlaFeatures               = false;
            bool substractChemAACrissCrossFeatures = false;


            SpecialFunctions.CheckCondition(!includeAndHlaAndSTWithEpitopeAdjFeatures || includeFlankNECFeatures);

            Pair <NEC, Hla> necAndHlaX = (Pair <NEC, Hla>)entity;
            NEC             nec        = (null == flankSizeOrNull) ? necAndHlaX.First : NEC.GetInstance(necAndHlaX.First, (int)flankSizeOrNull);
            Hla             hla        = necAndHlaX.Second;

            Debug.Assert(nec.N.Length == nec.C.Length); // real assert
            Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla);

            Set <IHashableFeature> hlaishFeatureSet = new Set <IHashableFeature>();

            CreateAndAddHlaFeature(subtractHlaFeatures, hla, necAndHla, ref hlaishFeatureSet);
            CreateAndAddFeatureSupertype(supertypeTableSource, subtractSupertypeFeatures, hla, necAndHla, ref hlaishFeatureSet, Assembly.GetExecutingAssembly(), Predictor.ResourceString);

            Set <IHashableFeature> featureSet = Set <IHashableFeature> .GetInstance(hlaishFeatureSet);

            if (addEiFeatures)
            {
                AddEiFeatures(includeChemicalProperties, includeAAFeatures, substractChemAACrissCrossFeatures, nec, necAndHla, hlaishFeatureSet, featureSet);
            }



            if (includeFlankNECFeatures)
            {
                List <IHashableFeature> aaInNFlankFeatureList = new List <IHashableFeature>(In.GetAASeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                DebugCheckThatEvaluatesToTrue(necAndHla, aaInNFlankFeatureList);



                if (includeAAFeatures)
                {
                    featureSet.AddNewRange(aaInNFlankFeatureList);                                                    //AA in N flank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));          //AA1-AA2 in Nflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla)); //AA@x in N flank (numbering is 5 4 3 2 1)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla)); //AA1-AA2@x in Nflank (x is position of AA2, i.e., the smaller number)


                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));         //AA in Cflank
                    featureSet.AddNewRange(In.GetAASeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));         //AA1-AA2 in Cflank
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla)); //AA@x in C flank (numbering is 1 2 3 4 5)
                    featureSet.AddNewRange(SubSeq.GetInSubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla)); //AA1-AA2@x in Cflank (x is position of AA1, i.e., the smaller number)
                }

                if (includeChemicalProperties)
                {
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(1, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 1, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, NFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(NFlank.GetInstance(), false, 2, necAndHla));
                    featureSet.AddNewOrOldRange(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, CFlank.GetInstance()));
                    featureSet.AddNewOrOldRange(SubSeq.GetInPropertySubSeqEnumeration(CFlank.GetInstance(), true, 2, necAndHla));
                }
            }
            if (includeFlankNECFeatures)
            {
                if (includeAAFeatures)
                {
                    //EV in Epitope
                    AddFeatureWithOptionalAndHlaAndST(In.GetAASeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2 in Epitope

                    //RR in Epitope[@1-2]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInSubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);//AA1-AA2@x in Epitope (x is position of AA1, i.e., the smaller number)
                }
                if (includeChemicalProperties)
                {
                    //polar,cyclic in Epitope
                    AddFeatureWithOptionalAndHlaAndST(InProperty.GetPropertySeqInRegionInstance(2, necAndHla, Epitope.GetInstance()), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                    //polar,large in Epitope[@8-9]
                    AddFeatureWithOptionalAndHlaAndST(SubSeq.GetInPropertySubSeqEnumeration(Epitope.GetInstance(), true, 2, necAndHla), includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, false, ref featureSet);
                }

                //AA1-AA2 in Nflank,Epitope, etc
                if (null != flankSizeOrNull && (int)flankSizeOrNull > 0)
                {
                    string epitope = (string)Epitope.GetInstance().Evaluate(entity);

                    SubSeq lastNAAFeature = SubSeq.GetInstance(1, 1, false, NFlank.GetInstance());
                    string lastNAA        = (string)lastNAAFeature.Evaluate(entity);
                    In     inLastNAA      = In.GetInstance(lastNAA, lastNAAFeature);

                    SubSeq firstEAAFeature = SubSeq.GetInstance(1, 1, true, Epitope.GetInstance());
                    string firstEAA        = (string)firstEAAFeature.Evaluate(entity);
                    Debug.Assert(firstEAA == epitope.Substring(0, 1));// real assert
                    In inFirstEAA = In.GetInstance(firstEAA, firstEAAFeature);

                    SubSeq lastEAAFeature = SubSeq.GetInstance(epitope.Length, epitope.Length, true, Epitope.GetInstance());
                    string lastEAA        = (string)lastEAAFeature.Evaluate(entity);
                    In     inLastEAA      = In.GetInstance(lastEAA, lastEAAFeature);

                    SubSeq firstCAAFeature = SubSeq.GetInstance(1, 1, true, CFlank.GetInstance());
                    string firstCAA        = (string)firstCAAFeature.Evaluate(entity);
                    In     inFirstCAA      = In.GetInstance(firstCAA, firstCAAFeature);

                    if (includeAAFeatures)
                    {
                        And andLastNNAAFirstEAA = And.GetInstance(inLastNAA, inFirstEAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastNNAAFirstEAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);

                        And andLastEAAFirstCAA = And.GetInstance(inLastEAA, inFirstCAA);
                        AddFeatureWithOptionalAndHlaAndST(andLastEAAFirstCAA, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ true, ref featureSet);
                    }

                    if (includeChemicalProperties)
                    {
                        foreach (string lastNProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastNAA[0]]])
                        {
                            InProperty inLastNProperty = InProperty.GetInstance(lastNProperty, lastNAAFeature);

                            foreach (string firstEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstEAA[0]]])
                            {
                                InProperty inFirstEProperty = InProperty.GetInstance(firstEProperty, firstEAAFeature); //!!!get this out of the loop?
                                And        andLastNPropertyFirstEProperty = And.GetInstance(inLastNProperty, inFirstEProperty);
                                AddFeatureWithOptionalAndHlaAndST(andLastNPropertyFirstEProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andLastNPropertyFirstEProperty.Evaluate(necAndHla));
                            }
                        }
                        foreach (string lastEProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[lastEAA[0]]])
                        {
                            InProperty inlastEProperty = InProperty.GetInstance(lastEProperty, lastEAAFeature);

                            foreach (string firstCProperty in KmerProperties.AaToPropList[Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[firstCAA[0]]])
                            {
                                InProperty infirstCProperty = InProperty.GetInstance(firstCProperty, firstCAAFeature); //!!!get this out of the loop?
                                And        andlastEPropertyfirstCProperty = And.GetInstance(inlastEProperty, infirstCProperty);
                                AddFeatureWithOptionalAndHlaAndST(andlastEPropertyfirstCProperty, includeAndHlaAndSTWithEpitopeAdjFeatures, hlaishFeatureSet, /*checkThatNew*/ false, ref featureSet);
                                Debug.Assert((bool)andlastEPropertyfirstCProperty.Evaluate(necAndHla));
                            }
                        }
                    }
                }
            }

            return(featureSet);
        }