static public Set <Hla> HlaSetInternal(string parameter, Set <Hla> hlaSet, Dictionary <string, Set <Hla> > supertypeMap) { Hla hla = HlaFactoryNoConstraints.GetGroundInstance(parameter); //SpecialFunctions.CheckCondition(hlaSet.Contains(hla), string.Format("Hla value of {0} is unknown", parameter)); return(Set <Hla> .GetInstance(hla)); }
//!!!very similar to other code public static Dictionary <Pair <NEC, Hla>, bool> ReadTable(HlaFactory hlaFactory, string fileName, bool dedup) { Dictionary <Pair <NEC, Hla>, bool> table = new Dictionary <Pair <NEC, Hla>, bool>(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "N\tepitope\tC\thla\tlabel", false)) { string n = row["N"]; string epitope = row["epitope"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope)); string c = row["C"]; NEC nec = NEC.GetInstance(n, epitope, c); Hla hla = hlaFactory.GetGroundInstance(row["hla"]); string labelString = row["label"]; SpecialFunctions.CheckCondition(labelString == "0" || labelString == "1", "Expect label to be '0' or '1'"); Pair <NEC, Hla> pair = new Pair <NEC, Hla>(nec, hla); bool labelAsBool = (labelString == "1"); if (dedup && table.ContainsKey(pair)) { SpecialFunctions.CheckCondition(table[pair] == labelAsBool, "The example " + pair.ToString() + " appears with contradictory labels."); continue; } table.Add(pair, labelAsBool); } return(table); }
public double Predict(List <Dictionary <string, string> > patientTable, NEC nec, bool modelOnly) { double predictedPTotal = 0.0; foreach (Dictionary <string, string> patientRow in patientTable) { double product = 1.0; foreach (KeyValuePair <string, string> columnAndValue in patientRow) { Hla hla = HlaFactory.GetGroundInstance(columnAndValue.Key.Substring(0, 1) + columnAndValue.Value); Debug.Assert(nec.N.Length == SampleNEC.N.Length && nec.E.Length == SampleNEC.E.Length && nec.C.Length == SampleNEC.C.Length); // real assert string sourceIgnore; double probability = Predict(nec, hla, modelOnly, out sourceIgnore); product *= 1.0 - probability; } double noiseyOrForThisPatient = 1.0 - product; predictedPTotal += noiseyOrForThisPatient; } double predictedP = predictedPTotal / (double)patientTable.Count; return(predictedP); }
//internal EpitopeLearningData[] Split(int cCrossValPart, Random aRandom) //{ // List<KeyValuePair<MerAndHlaToLength, bool>> shuffleList = new List<KeyValuePair<MerAndHlaToLength, bool>>(); // foreach (KeyValuePair<Pair<string, Hla>, bool> merAndHlaToLengthWithLabel in this) // { // shuffleList.Add(merAndHlaToLengthWithLabel); // int iRandomPos = aRandom.Next(shuffleList.Count); // shuffleList[shuffleList.Count - 1] = shuffleList[iRandomPos]; // shuffleList[iRandomPos] = merAndHlaToLengthWithLabel; // } // EpitopeLearningData[] rgrg = new EpitopeLearningData[cCrossValPart]; // for (int irgrg = 0; irgrg < rgrg.Length; ++irgrg) // { // rgrg[irgrg] = new EpitopeLearningData(string.Format("{0}{1}", Name, irgrg)); // } // for (int iShuffleList = 0; iShuffleList < shuffleList.Count; ++iShuffleList) // { // KeyValuePair<MerAndHlaToLength, bool> merAndHlaToLengthWithLabel = shuffleList[iShuffleList]; // int iSet = iShuffleList * cCrossValPart / shuffleList.Count; // rgrg[iSet].Add(merAndHlaToLengthWithLabel.Key, merAndHlaToLengthWithLabel.Value); // } // return rgrg; //} public static EpitopeLearningData GetDbWhole(HlaFactory hlaFactory, int eLength, string datasetName, string fileOrResourceName) { Set <string> wantedSet = CreateSourceSet(datasetName); EpitopeLearningData rg = new EpitopeLearningData(datasetName); //SpecialFunctions.CheckCondition(hlaResolution.Equals(HlaResolution.ABMixed)); foreach (Dictionary <string, string> row in Predictor.TabFileTable(fileOrResourceName, "peptide hla source label", false)) { string peptide = row["peptide"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(peptide), string.Format("Peptide, '{0}', contains illegal char.", peptide)); if (peptide.Length != eLength) //!!!const { continue; } string source = row["source"]; Set <string> providedSet = CreateSourceSet(source); //Debug.Assert(providedSet.IsSubsetOf(Set<string>.GetInstance(new string[] { "Aplus", "LANL", "IEDB" }))); // real assert if (providedSet.IntersectionIsEmpty(wantedSet)) { continue; } Hla hla = hlaFactory.GetGroundInstance(row["hla"]); //HlaToLength hlaToLength = HlaToLength.GetInstance(hla, hlaResolution); Pair <string, Hla> peptideAndHla = new Pair <string, Hla>(peptide, hla); //MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(peptide, hlaToLength, kmerDefinition); string label = row["label"]; SpecialFunctions.CheckCondition(label == "0" || label == "1", string.Format("Warning: Epitope example {0} has unknown label {1} and will be ignored.", peptideAndHla, label)); rg[peptideAndHla] = (label == "1"); } return(rg); }