示例#1
0
        internal EpitopeLearningData Subtract(EpitopeLearningData other)
        {
            EpitopeLearningData rgOut = new EpitopeLearningData(string.Format("{0}-{1}", Name, other.Name));

            foreach (Pair <string, Hla> aMerAndHlaToLength in Keys)
            {
                if (!other.ContainsKey(aMerAndHlaToLength))
                {
                    rgOut[aMerAndHlaToLength] = this[aMerAndHlaToLength];
                }
            }
            return(rgOut);
        }
示例#2
0
        //internal static EpitopeLearningData GetSyfpeithiCollection(HlaResolution hlaResolution, KmerDefinition kmerDefinition)
        //{
        //    Debug.Assert(kmerDefinition.FullMerCount == kmerDefinition.EpitopeMerCount); //!!!this is all we have code for
        //    Debug.Assert(8 <= kmerDefinition.EpitopeMerCount && kmerDefinition.EpitopeMerCount <= 11);
        //    Debug.Assert(hlaResolution is ABMixedHlaResolution); //!!!this is all we have code for

        //    EpitopeLearningData rg = new EpitopeLearningData("Syfpeithi");

        //    string fileInput = @"W_2005-04-05_09_57_55_syfpeithiClean.txt";//!!!const

        //    using (StreamReader streamreader = Predictor.OpenResource(fileInput))
        //    {
        //        string line = streamreader.ReadLine();
        //        Debug.Assert(line == "Epitope ID	Epitope Sequence	HLA	kMerLength	Anchor 1	Anchor 2	Anchor 3");
        //        while (null != (line = streamreader.ReadLine()))
        //        {
        //            string[] fieldCollection = line.Split('\t');
        //            SpecialFunctions.CheckCondition(fieldCollection.Length >= 4); //!!!raise error


        //            string aaSequence = fieldCollection[1].Trim();
        //            if (aaSequence.Length != kmerDefinition.FullMerCount)
        //            {
        //                continue;
        //            }
        //            if (aaSequence.Contains("X"))
        //            {
        //                continue;
        //            }


        //            string hlaAsString = fieldCollection[2].Trim();

        //            HlaToLength hlaToLength = HlaToLength.GetInstance(hlaAsString, hlaResolution);

        //            MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(aaSequence, hlaToLength, kmerDefinition);

        //            rg[aMerAndHlaToLength] = true;
        //        }
        //    }
        //    return rg;
        //}

        //internal static EpitopeLearningData GetMhcPepCollection(HlaResolution hlaResolution, KmerDefinition kmerDefinition)
        //{
        //    Debug.Fail("The MHC Pep data needs it's A68's expanded before it can be used");
        //    Debug.Assert(kmerDefinition.EpitopeMerCount == 9 && kmerDefinition.FullMerCount == 9); //!!!this is the case we have code for
        //    EpitopeLearningData rg = new EpitopeLearningData("MhcPep");
        //    string fileInput = @"W_2005-04-05_11_43_49_MHCPEPcleanDuplicatesRemoved.txt"; //!!!const

        //    using (StreamReader streamreader = Predictor.OpenResource(fileInput))
        //    {
        //        string line = streamreader.ReadLine();
        //        Debug.Assert(line == "Epitopes	Original HLA (4 digit where possible)	Our Canonical HLA	Length");
        //        while (null != (line = streamreader.ReadLine()))
        //        {
        //            string[] fieldCollection = line.Split('\t');
        //            SpecialFunctions.CheckCondition(fieldCollection.Length == 4); //!!!raise error


        //            string aaSequence = fieldCollection[0].Trim();
        //            if (aaSequence.Length != kmerDefinition.FullMerCount)
        //            {
        //                continue;
        //            }


        //            string hlaAsString = fieldCollection[2].Trim();
        //            if (hlaAsString == "B15" || hlaAsString == "A68")
        //            {
        //                Debug.WriteLine("Skipping 2-digit B15 and A68's");
        //                continue;
        //            }
        //            HlaToLength hlaToLength = HlaToLength.GetInstanceABMixed(hlaAsString);

        //            int length = int.Parse(fieldCollection[3]);
        //            SpecialFunctions.CheckCondition(length == aaSequence.Length); //!!!raise error

        //            MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(aaSequence, hlaToLength, kmerDefinition);
        //            rg[aMerAndHlaToLength] = true;
        //        }
        //    }
        //    return rg;
        //}

        public static EpitopeLearningData Union(params EpitopeLearningData[] rgrg)
        {
            EpitopeLearningData rgOut = new EpitopeLearningData(null);
            StringBuilder       sb    = new StringBuilder();

            foreach (EpitopeLearningData rgIn in rgrg)
            {
                if (sb.Length > 0)
                {
                    sb.Append('+');
                }
                sb.Append(rgIn.Name);
                foreach (KeyValuePair <Pair <string, Hla>, bool> aMerAndHlaToLengthWithLabel in rgIn)
                {
                    rgOut[aMerAndHlaToLengthWithLabel.Key] = aMerAndHlaToLengthWithLabel.Value;
                }
            }
            rgOut.Name = sb.ToString();
            return(rgOut);
        }
示例#3
0
        //internal EpitopeLearningData[] Split(int cCrossValPart, Random aRandom)
        //{
        //    List<KeyValuePair<MerAndHlaToLength, bool>> shuffleList = new List<KeyValuePair<MerAndHlaToLength, bool>>();
        //    foreach (KeyValuePair<Pair<string, Hla>, bool> merAndHlaToLengthWithLabel in this)
        //    {
        //        shuffleList.Add(merAndHlaToLengthWithLabel);
        //        int iRandomPos = aRandom.Next(shuffleList.Count);
        //        shuffleList[shuffleList.Count - 1] = shuffleList[iRandomPos];
        //        shuffleList[iRandomPos] = merAndHlaToLengthWithLabel;
        //    }

        //    EpitopeLearningData[] rgrg = new EpitopeLearningData[cCrossValPart];
        //    for (int irgrg = 0; irgrg < rgrg.Length; ++irgrg)
        //    {
        //        rgrg[irgrg] = new EpitopeLearningData(string.Format("{0}{1}", Name, irgrg));
        //    }
        //    for (int iShuffleList = 0; iShuffleList < shuffleList.Count; ++iShuffleList)
        //    {
        //        KeyValuePair<MerAndHlaToLength, bool> merAndHlaToLengthWithLabel = shuffleList[iShuffleList];
        //        int iSet = iShuffleList * cCrossValPart / shuffleList.Count;
        //        rgrg[iSet].Add(merAndHlaToLengthWithLabel.Key, merAndHlaToLengthWithLabel.Value);
        //    }
        //    return rgrg;
        //}


        public static EpitopeLearningData GetDbWhole(HlaFactory hlaFactory, int eLength, string datasetName, string fileOrResourceName)
        {
            Set <string>        wantedSet = CreateSourceSet(datasetName);
            EpitopeLearningData rg        = new EpitopeLearningData(datasetName);

            //SpecialFunctions.CheckCondition(hlaResolution.Equals(HlaResolution.ABMixed));
            foreach (Dictionary <string, string> row in Predictor.TabFileTable(fileOrResourceName, "peptide	hla	source	label", false))
            {
                string peptide = row["peptide"];
                SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(peptide), string.Format("Peptide, '{0}', contains illegal char.", peptide));

                if (peptide.Length != eLength) //!!!const
                {
                    continue;
                }

                string       source      = row["source"];
                Set <string> providedSet = CreateSourceSet(source);
                //Debug.Assert(providedSet.IsSubsetOf(Set<string>.GetInstance(new string[] { "Aplus", "LANL", "IEDB" }))); // real assert
                if (providedSet.IntersectionIsEmpty(wantedSet))
                {
                    continue;
                }


                Hla hla = hlaFactory.GetGroundInstance(row["hla"]);
                //HlaToLength hlaToLength = HlaToLength.GetInstance(hla, hlaResolution);
                Pair <string, Hla> peptideAndHla = new Pair <string, Hla>(peptide, hla);
                //MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(peptide, hlaToLength, kmerDefinition);

                string label = row["label"];
                SpecialFunctions.CheckCondition(label == "0" || label == "1", string.Format("Warning: Epitope example {0} has unknown label {1} and will be ignored.", peptideAndHla, label));
                rg[peptideAndHla] = (label == "1");
            }

            return(rg);
        }
示例#4
0
 internal EpitopeLearningData Union(EpitopeLearningData other)
 {
     return(Union(this, other));
 }