Ejemplo n.º 1
0
        static public Set <Hla> HlaSetInternal(string parameter, Set <Hla> hlaSet, Dictionary <string, Set <Hla> > supertypeMap)
        {
            Hla hla = HlaFactoryNoConstraints.GetGroundInstance(parameter);

            //SpecialFunctions.CheckCondition(hlaSet.Contains(hla), string.Format("Hla value of {0} is unknown", parameter));
            return(Set <Hla> .GetInstance(hla));
        }
Ejemplo n.º 2
0
        private void LoadUpPredictor(string modelName, int eLength, int ncLength, Converter <Hla, Hla> hlaForNormalization)
        {
            //Load up the predictor

            string featurerizerName;

            switch (modelName.ToLower())
            {
            //!!!would be better not to have multiple of these switch statements around - looks like a job for a Class
            case "lanliedb03062007":
                featurerizerName   = "[email protected]";
                SampleNEC          = NEC.GetInstance("", new string(' ', eLength), "");
                HlaFactory         = HlaFactory.GetFactory("MixedWithB15AndA68");
                SourceDataFileName = "lanlIedb03062007.pos.source.txt";
                NameList           = new string[] { "LANL", "IEDB" };
                break;

            default:
                SpecialFunctions.CheckCondition(false, "Don't know what featurerizer to use for the model");
                featurerizerName   = null;
                SourceDataFileName = null;
                NameList           = null;
                break;
            }
            Converter <object, Set <IHashableFeature> > featurizer = FeatureLib.CreateFeaturizer(featurerizerName);

            //GeneratorType generatorType = GeneratorType.ComboAndZero6SuperType;
            //FeatureSerializer featureSerializer = PositiveNegativeExperimentCollection.GetFeatureSerializer();
            //KmerDefinition = kmerDefinition;
            //HlaResolution hlaResolution = HlaResolution.ABMixed;
            string resourceName = string.Format("maxentModel{0}{1}{2}{3}.xml", modelName.Split('.')[0], SampleNEC.N.Length, SampleNEC.E.Length, SampleNEC.C.Length);

            EpitopeLearningDataList = new List <EpitopeLearningDataDupHlaOK>();
            using (StreamReader streamReader = Predictor.OpenResource(resourceName))
            {
                Logistic = (Logistic)FeatureLib.FeatureSerializer.FromXmlStreamReader(streamReader);
                //Logistic.FeatureGenerator = EpitopeFeatureGenerator.GetInstance(KmerDefinition, generatorType, featureSerializer).GenerateFeatureSet;
                Logistic.FeatureGenerator = FeatureLib.CreateFeaturizer(featurerizerName);
                foreach (string name in NameList)
                {
                    EpitopeLearningData epitopeLearningDataX = EpitopeLearningData.GetDbWhole(HlaFactory, SampleNEC.E.Length, name, SourceDataFileName);
                    Debug.Assert(epitopeLearningDataX.Count > 0, "Expect given data to have some data");
                    //!!!combine with previous step
                    EpitopeLearningDataDupHlaOK epitopeLearningData = new EpitopeLearningDataDupHlaOK(epitopeLearningDataX.Name);
                    foreach (KeyValuePair <Pair <string, Hla>, bool> merAndHlaAndLabel in epitopeLearningDataX)
                    {
                        Hla hlaIn  = merAndHlaAndLabel.Key.Second;
                        Hla hlaOut = hlaForNormalization(hlaIn);

                        Dictionary <Hla, Dictionary <Hla, bool> > hla2ToHlaToLabel = SpecialFunctions.GetValueOrDefault(epitopeLearningData, merAndHlaAndLabel.Key.First);
                        Dictionary <Hla, bool> hlaToLabel = SpecialFunctions.GetValueOrDefault(hla2ToHlaToLabel, hlaOut);
                        hlaToLabel.Add(hlaIn, merAndHlaAndLabel.Value);
                    }

                    EpitopeLearningDataList.Add(epitopeLearningData);
                }
            }

            HlaForNormalization = hlaForNormalization;
        }
        private static string SetSupertypeAny(Hla hla, SupertypeSpec hasBlanks)
        {
            switch (hasBlanks)
            {
            case SupertypeSpec.HasBlanksTrue:
            {
                HlaToLength hlaToLength  = HlaToLength.GetInstanceABMixed(hla.Name);
                string      supertypeAny = hlaToLength.ToZero6SupertypeBlanksString();
                return(supertypeAny);
            }

            case SupertypeSpec.HasBlanksFalse:
            {
                HlaToLength hlaToLength  = HlaToLength.GetInstanceABMixed(hla.Name);
                string      supertypeAny = hlaToLength.ToZero6SupertypeNoBlanksString();
                return(supertypeAny);
            }

            case SupertypeSpec.None:
                return("none");    //!!!const

            default:
                SpecialFunctions.CheckCondition(false, "unknown SupertypeSpec: " + hasBlanks.ToString());
                return(null);
            }
        }
        private void AddToHlaToPriorLogOdds(Hla hla, Dictionary <int, int> lengthToSmoothedCount, int smoothedTotal, int k)
        {
            double relFreq = (double)lengthToSmoothedCount[k] / (double)smoothedTotal;
            Dictionary <Hla, double> hlaToPriorLogOdds = SpecialFunctions.GetValueOrDefault(KToHlaToPriorLogOdds, k);

            hlaToPriorLogOdds.Add(hla, SpecialFunctions.LogOdds((relFreq / .25) * RatioOfTrueToFalseTrainingExample));
        }
        static public Hla TrimToHla2(Hla hlaIn)
        {
            string hlaName = hlaIn.ToString();

            SpecialFunctions.CheckCondition(hlaName.Length >= 3, string.Format("Expected hla name to have length of at least 3 (a class and two digits), but it is '{0}'.", hlaName));
            Hla hlaOut = SingletonSpecification.HlaFactoryNoConstraints.GetGroundInstance(hlaName.Substring(0, 3));

            return(hlaOut);
        }
        private void AddToSupertypeMap(Hla hla)
        {
            string supertypeAny = SetSupertypeAny(hla, HasBlanks);

            if (supertypeAny != "unknown" && supertypeAny != "none") //!!!"unknown" is a misnomer. Should be "none" or null, but don't want to change it because it is already in useful models.
            {
                Set <Hla> hlaSet = SpecialFunctions.GetValueOrDefault(_supertypeMap, supertypeAny);
                hlaSet.AddNewOrOld(hla);
            }
        }
        public override bool Equals(object obj)
        {
            Hla other = obj as Hla;

            if (null == other)
            {
                return(false);
            }
            else
            {
                return(this == other);
            }
        }
Ejemplo n.º 8
0
        public static Prediction GetInstance(string inputPeptide, Hla hla, double posteriorProbability, double weightOfEvidence, NEC nec, int eStartPosition, int eLastPosition, string source)
        {
            Prediction prediction = new Prediction();

            prediction.InputPeptide         = inputPeptide;
            prediction.Hla                  = hla;
            prediction.PosteriorProbability = posteriorProbability;
            prediction.WeightOfEvidence     = weightOfEvidence;
            prediction.NEC                  = nec;
            prediction.EStartPosition       = eStartPosition;
            prediction.ELastPosition        = eLastPosition;
            prediction.Source               = source;
            return(prediction);
        }
        public override bool IsMoreGeneralThan(Hla hla)
        {
            Debug.Assert(!IsGround);                       // real assert
            SpecialFunctions.CheckCondition(hla.IsGround); //We only have code for this

            foreach (string subHlaName in Name.Split('/'))
            {
                if (subHlaName == hla.Name)
                {
                    return(true);
                }
            }
            return(false);
        }
Ejemplo n.º 10
0
        //static PatchPatternFactory PatchPatternFactory = PatchPatternFactory.GetFactory("strings");

        //private static PatchPattern CreateStringPatchPattern(string peptide)
        //{
        //    PatchPatternBuilder aPatchPatternBuilder = PatchPatternFactory.GetBuilder();
        //    aPatchPatternBuilder.AppendGroundDisjunct(peptide);
        //    PatchPattern patchPattern = aPatchPatternBuilder.ToPatchPattern();
        //    return patchPattern;
        //}


        public double Predict(NEC nec, Hla hla, bool modelOnly, out string source)
        {
            Debug.Assert(HlaFactory.IsGroundOrAbstractInstance(hla.ToString())); // real assert
            SpecialFunctions.CheckCondition(nec.N.Length == SampleNEC.N.Length && nec.E.Length == SampleNEC.E.Length && nec.C.Length == SampleNEC.C.Length,
                                            string.Format("Length of peptide must be {0},{1},{2}", SampleNEC.N.Length, SampleNEC.E.Length, SampleNEC.C.Length));
            Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla);

            List <Pair <string, Hla> > sourceAndOriginalHlaCollection = ListAllSourcesContainingThisMerAndHlaToLength(necAndHla);

            source = SpecialFunctions.Join("+", sourceAndOriginalHlaCollection); //Will be "" if list is empty

            double probability = (sourceAndOriginalHlaCollection.Count == 0 || modelOnly) ? probability = (double)Logistic.EvaluateViaCache(necAndHla) : 1.0;

            return(probability);
        }
Ejemplo n.º 11
0
        public double Predict(List <Dictionary <string, string> > patientTable, NEC nec, bool modelOnly)
        {
            double predictedPTotal = 0.0;

            foreach (Dictionary <string, string> patientRow in patientTable)
            {
                double product = 1.0;
                foreach (KeyValuePair <string, string> columnAndValue in patientRow)
                {
                    Hla hla = HlaFactory.GetGroundInstance(columnAndValue.Key.Substring(0, 1) + columnAndValue.Value);
                    Debug.Assert(nec.N.Length == SampleNEC.N.Length && nec.E.Length == SampleNEC.E.Length && nec.C.Length == SampleNEC.C.Length); // real assert
                    string sourceIgnore;
                    double probability = Predict(nec, hla, modelOnly, out sourceIgnore);
                    product *= 1.0 - probability;
                }
                double noiseyOrForThisPatient = 1.0 - product;
                predictedPTotal += noiseyOrForThisPatient;
            }
            double predictedP = predictedPTotal / (double)patientTable.Count;

            return(predictedP);
        }
        ////!!! this could be moved into a class
        //private object CreateKey(Prediction prediction, Best display)
        //{
        //    switch (display)
        //    {
        //        case Best.overall:
        //            return "best";
        //        case Best.perHla:
        //            return prediction.Hla;
        //        case Best.perPrediction:
        //            return prediction;
        //        case  Best.perLength:
        //            return prediction.K;
        //        case Best.perHlaAndLength:
        //            return new Pair<Hla, int>(prediction.Hla, prediction.K);
        //        default:
        //            SpecialFunctions.CheckCondition(false, "Don't know how to display " + display.ToString());
        //            return null;
        //    }
        //}

        /// <summary>
        ///  HlaSetSpecification class choices:
        ///        HlaSetSpecification.Singleton – Means that an Hla will be given and it is the only hla to be considered
        ///        HlaSetSpecification.Supertype – Means that a supertype will be given and it’s hlas should be considered
        ///        HlaSetSpecification.All – Means to consider all known hlas
        /// </summary>
        /// <param name="inputPeptide">a string of amino acids</param>
        /// <param name="merLength">A value from the MerLength enum, which includes MerLength.scan, MerLength.given, MerLength.Eight, etc</param>
        /// <param name="hlaSetSpecification">A predefined HlaSetSpecification class.</param>
        /// <param name="hlaOrSupertypeOrNull">The hla or supertype required by HlaSetSpecification, or null for HlaSetSpecification.All</param>
        /// <param name="modelOnly">If should report the probability from the model, even when the epitope is on a source list.</param>
        /// <returns></returns>
        public IEnumerable <Prediction> PredictionEnumeration(string inputPeptide, MerLength merLength, int?dOfCenter, HlaSetSpecification hlaSetSpecification, string hlaOrSupertypeOrNull, bool modelOnly)
        {
            Set <Hla> hlaSet = HlaSet(hlaSetSpecification, hlaOrSupertypeOrNull);

            foreach (int eLength in KEnumeration(merLength, inputPeptide.Length))
            {
                Predictor predictor = KToPredictor[eLength];
                Dictionary <Hla, double> hlaToPriorLogOdds = KToHlaToPriorLogOdds[eLength];

                int necLength = NCLength + eLength + NCLength;
                foreach (int startIndex in StartIndexEnumeration(inputPeptide.Length, necLength, dOfCenter))
                {
                    string peptide = inputPeptide.Substring(startIndex, necLength);
                    NEC    nec     = NEC.GetInstance(peptide, NCLength, eLength, NCLength);
                    foreach (Hla hla in hlaSet)
                    {
                        Hla    hlaForNormalization = HlaForNormalization(hla);
                        double priorLogOddsOfThisLengthAndHla;
                        if (!hlaToPriorLogOdds.TryGetValue(hlaForNormalization, out priorLogOddsOfThisLengthAndHla))
                        {
                            SpecialFunctions.CheckCondition(!RaiseErrorIfNotFoundInNormalizationTable, string.Format("Hla '{0}' (which is '{1}' for the purposes of normalization) and is not found in the normalization table", hla, hlaForNormalization));
                            priorLogOddsOfThisLengthAndHla = SpecialFunctions.LogOdds(RatioOfTrueToFalseTrainingExample);
                        }


                        string source;
                        double originalP       = predictor.Predict(nec, hla, modelOnly, out source);
                        double originalLogOdds = SpecialFunctions.LogOdds(originalP);

                        double     correctedLogOdds     = originalLogOdds + priorLogOddsOfThisLengthAndHla;
                        double     posteriorProbability = SpecialFunctions.InverseLogOdds(correctedLogOdds);
                        double     weightOfEvidence     = correctedLogOdds - SpecialFunctions.LogOdds(RatioOfTrueToFalseTrainingExample);
                        Prediction prediction           = Prediction.GetInstance(inputPeptide, hla, posteriorProbability, weightOfEvidence, nec, startIndex + NCLength + 1, startIndex + NCLength + eLength, source);
                        yield return(prediction);
                    }
                }
            }
        }
Ejemplo n.º 13
0
        private List <Pair <string, Hla> > ListAllSourcesContainingThisMerAndHlaToLength(Pair <NEC, Hla> necAndHlaIn)
        {
            string peptide = necAndHlaIn.First.E;
            Hla    hlaNorm = HlaForNormalization(necAndHlaIn.Second);
            List <Pair <string, Hla> > sourceAndOriginalHlaCollection = new List <Pair <string, Hla> >();

            foreach (EpitopeLearningDataDupHlaOK epitopeLearningData in EpitopeLearningDataList)
            {
                if (null != epitopeLearningData && epitopeLearningData.ContainsKey(peptide))
                {
                    Dictionary <Hla, Dictionary <Hla, bool> > hlaNormToHlaOriToLabel = epitopeLearningData[peptide];
                    if (hlaNormToHlaOriToLabel.ContainsKey(hlaNorm))
                    {
                        foreach (KeyValuePair <Hla, bool> hlaOriAndLabel in hlaNormToHlaOriToLabel[hlaNorm])
                        {
                            Debug.Assert(hlaOriAndLabel.Value); // real assert
                            sourceAndOriginalHlaCollection.Add(new Pair <string, Hla>(epitopeLearningData.Name, hlaOriAndLabel.Key));
                        }
                    }
                }
            }
            return(sourceAndOriginalHlaCollection);
        }
 public virtual bool IsMoreGeneralThan(Hla possibleCause)
 {
     throw new Exception("The method or operation is not implemented.");
 }
 static public Hla Identity(Hla hla)
 {
     return(hla);
 }