internal EpitopeLearningData Subtract(EpitopeLearningData other) { EpitopeLearningData rgOut = new EpitopeLearningData(string.Format("{0}-{1}", Name, other.Name)); foreach (Pair <string, Hla> aMerAndHlaToLength in Keys) { if (!other.ContainsKey(aMerAndHlaToLength)) { rgOut[aMerAndHlaToLength] = this[aMerAndHlaToLength]; } } return(rgOut); }
//internal static EpitopeLearningData GetSyfpeithiCollection(HlaResolution hlaResolution, KmerDefinition kmerDefinition) //{ // Debug.Assert(kmerDefinition.FullMerCount == kmerDefinition.EpitopeMerCount); //!!!this is all we have code for // Debug.Assert(8 <= kmerDefinition.EpitopeMerCount && kmerDefinition.EpitopeMerCount <= 11); // Debug.Assert(hlaResolution is ABMixedHlaResolution); //!!!this is all we have code for // EpitopeLearningData rg = new EpitopeLearningData("Syfpeithi"); // string fileInput = @"W_2005-04-05_09_57_55_syfpeithiClean.txt";//!!!const // using (StreamReader streamreader = Predictor.OpenResource(fileInput)) // { // string line = streamreader.ReadLine(); // Debug.Assert(line == "Epitope ID Epitope Sequence HLA kMerLength Anchor 1 Anchor 2 Anchor 3"); // while (null != (line = streamreader.ReadLine())) // { // string[] fieldCollection = line.Split('\t'); // SpecialFunctions.CheckCondition(fieldCollection.Length >= 4); //!!!raise error // string aaSequence = fieldCollection[1].Trim(); // if (aaSequence.Length != kmerDefinition.FullMerCount) // { // continue; // } // if (aaSequence.Contains("X")) // { // continue; // } // string hlaAsString = fieldCollection[2].Trim(); // HlaToLength hlaToLength = HlaToLength.GetInstance(hlaAsString, hlaResolution); // MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(aaSequence, hlaToLength, kmerDefinition); // rg[aMerAndHlaToLength] = true; // } // } // return rg; //} //internal static EpitopeLearningData GetMhcPepCollection(HlaResolution hlaResolution, KmerDefinition kmerDefinition) //{ // Debug.Fail("The MHC Pep data needs it's A68's expanded before it can be used"); // Debug.Assert(kmerDefinition.EpitopeMerCount == 9 && kmerDefinition.FullMerCount == 9); //!!!this is the case we have code for // EpitopeLearningData rg = new EpitopeLearningData("MhcPep"); // string fileInput = @"W_2005-04-05_11_43_49_MHCPEPcleanDuplicatesRemoved.txt"; //!!!const // using (StreamReader streamreader = Predictor.OpenResource(fileInput)) // { // string line = streamreader.ReadLine(); // Debug.Assert(line == "Epitopes Original HLA (4 digit where possible) Our Canonical HLA Length"); // while (null != (line = streamreader.ReadLine())) // { // string[] fieldCollection = line.Split('\t'); // SpecialFunctions.CheckCondition(fieldCollection.Length == 4); //!!!raise error // string aaSequence = fieldCollection[0].Trim(); // if (aaSequence.Length != kmerDefinition.FullMerCount) // { // continue; // } // string hlaAsString = fieldCollection[2].Trim(); // if (hlaAsString == "B15" || hlaAsString == "A68") // { // Debug.WriteLine("Skipping 2-digit B15 and A68's"); // continue; // } // HlaToLength hlaToLength = HlaToLength.GetInstanceABMixed(hlaAsString); // int length = int.Parse(fieldCollection[3]); // SpecialFunctions.CheckCondition(length == aaSequence.Length); //!!!raise error // MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(aaSequence, hlaToLength, kmerDefinition); // rg[aMerAndHlaToLength] = true; // } // } // return rg; //} public static EpitopeLearningData Union(params EpitopeLearningData[] rgrg) { EpitopeLearningData rgOut = new EpitopeLearningData(null); StringBuilder sb = new StringBuilder(); foreach (EpitopeLearningData rgIn in rgrg) { if (sb.Length > 0) { sb.Append('+'); } sb.Append(rgIn.Name); foreach (KeyValuePair <Pair <string, Hla>, bool> aMerAndHlaToLengthWithLabel in rgIn) { rgOut[aMerAndHlaToLengthWithLabel.Key] = aMerAndHlaToLengthWithLabel.Value; } } rgOut.Name = sb.ToString(); return(rgOut); }
//internal EpitopeLearningData[] Split(int cCrossValPart, Random aRandom) //{ // List<KeyValuePair<MerAndHlaToLength, bool>> shuffleList = new List<KeyValuePair<MerAndHlaToLength, bool>>(); // foreach (KeyValuePair<Pair<string, Hla>, bool> merAndHlaToLengthWithLabel in this) // { // shuffleList.Add(merAndHlaToLengthWithLabel); // int iRandomPos = aRandom.Next(shuffleList.Count); // shuffleList[shuffleList.Count - 1] = shuffleList[iRandomPos]; // shuffleList[iRandomPos] = merAndHlaToLengthWithLabel; // } // EpitopeLearningData[] rgrg = new EpitopeLearningData[cCrossValPart]; // for (int irgrg = 0; irgrg < rgrg.Length; ++irgrg) // { // rgrg[irgrg] = new EpitopeLearningData(string.Format("{0}{1}", Name, irgrg)); // } // for (int iShuffleList = 0; iShuffleList < shuffleList.Count; ++iShuffleList) // { // KeyValuePair<MerAndHlaToLength, bool> merAndHlaToLengthWithLabel = shuffleList[iShuffleList]; // int iSet = iShuffleList * cCrossValPart / shuffleList.Count; // rgrg[iSet].Add(merAndHlaToLengthWithLabel.Key, merAndHlaToLengthWithLabel.Value); // } // return rgrg; //} public static EpitopeLearningData GetDbWhole(HlaFactory hlaFactory, int eLength, string datasetName, string fileOrResourceName) { Set <string> wantedSet = CreateSourceSet(datasetName); EpitopeLearningData rg = new EpitopeLearningData(datasetName); //SpecialFunctions.CheckCondition(hlaResolution.Equals(HlaResolution.ABMixed)); foreach (Dictionary <string, string> row in Predictor.TabFileTable(fileOrResourceName, "peptide hla source label", false)) { string peptide = row["peptide"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(peptide), string.Format("Peptide, '{0}', contains illegal char.", peptide)); if (peptide.Length != eLength) //!!!const { continue; } string source = row["source"]; Set <string> providedSet = CreateSourceSet(source); //Debug.Assert(providedSet.IsSubsetOf(Set<string>.GetInstance(new string[] { "Aplus", "LANL", "IEDB" }))); // real assert if (providedSet.IntersectionIsEmpty(wantedSet)) { continue; } Hla hla = hlaFactory.GetGroundInstance(row["hla"]); //HlaToLength hlaToLength = HlaToLength.GetInstance(hla, hlaResolution); Pair <string, Hla> peptideAndHla = new Pair <string, Hla>(peptide, hla); //MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(peptide, hlaToLength, kmerDefinition); string label = row["label"]; SpecialFunctions.CheckCondition(label == "0" || label == "1", string.Format("Warning: Epitope example {0} has unknown label {1} and will be ignored.", peptideAndHla, label)); rg[peptideAndHla] = (label == "1"); } return(rg); }
internal EpitopeLearningData Union(EpitopeLearningData other) { return(Union(this, other)); }