static internal Hashtable GetEqClassCollection()
        {
            SortedList rgClassToOneLetterAA = new SortedList();

            foreach (string sThreeLetter in Biology.GetInstance().AminoAcidEquivalence.Keys)
            {
                char   cAminoAcid = Biology.GetInstance().ThreeLetterAminoAcidAbbrevTo1Letter[sThreeLetter];
                string sClass     = Biology.GetInstance().AminoAcidEquivalence[sThreeLetter];
                if (!rgClassToOneLetterAA.ContainsKey(sClass))
                {
                    rgClassToOneLetterAA.Add(sClass, new StringBuilder());
                }
                StringBuilder aaList = (StringBuilder)rgClassToOneLetterAA[sClass];
                aaList.Append(cAminoAcid);
            }

            Hashtable eqClassCollection = new Hashtable();

            foreach (StringBuilder sb in rgClassToOneLetterAA.Values)
            {
                string s = sb.ToString();
                foreach (char c in s)
                {
                    eqClassCollection.Add(c, s);
                }
            }
            return(eqClassCollection);
        }
 static public Biology GetInstance()
 {
     if (Singleton == null)
     {
         Singleton = new Biology();
     }
     return(Singleton);
 }
        public static IEnumerable <List <string> > EveryPropertyCombination(string aaSeq)
        {
            List <List <string> > propertyListList = new List <List <string> >();

            foreach (char aa in aaSeq)
            {
                string aaAsString = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[aa];
                propertyListList.Add(KmerProperties.AaToPropList[aaAsString]);
            }

            return(SpecialFunctions.EveryCombination(propertyListList));
        }
        public static Set <char> PropertyNameToAASet(string propertyName)
        {
            KmerProperties kmerProperties = KmerProperties.GetInstance();

            int propertyNum = kmerProperties.PropertyToNumber[propertyName];

            Set <char> aaSet = Set <char> .GetInstance();

            foreach (KeyValuePair <string, bool[]> aaAndBits in kmerProperties.AABits)
            {
                if (aaAndBits.Value[propertyNum])
                {
                    char aa = Biology.GetInstance().ThreeLetterAminoAcidAbbrevTo1Letter[aaAndBits.Key];
                    aaSet.AddNew(aa);
                }
            }

            return(aaSet);
        }
Пример #5
0
        //internal EpitopeLearningData[] Split(int cCrossValPart, Random aRandom)
        //{
        //    List<KeyValuePair<MerAndHlaToLength, bool>> shuffleList = new List<KeyValuePair<MerAndHlaToLength, bool>>();
        //    foreach (KeyValuePair<Pair<string, Hla>, bool> merAndHlaToLengthWithLabel in this)
        //    {
        //        shuffleList.Add(merAndHlaToLengthWithLabel);
        //        int iRandomPos = aRandom.Next(shuffleList.Count);
        //        shuffleList[shuffleList.Count - 1] = shuffleList[iRandomPos];
        //        shuffleList[iRandomPos] = merAndHlaToLengthWithLabel;
        //    }

        //    EpitopeLearningData[] rgrg = new EpitopeLearningData[cCrossValPart];
        //    for (int irgrg = 0; irgrg < rgrg.Length; ++irgrg)
        //    {
        //        rgrg[irgrg] = new EpitopeLearningData(string.Format("{0}{1}", Name, irgrg));
        //    }
        //    for (int iShuffleList = 0; iShuffleList < shuffleList.Count; ++iShuffleList)
        //    {
        //        KeyValuePair<MerAndHlaToLength, bool> merAndHlaToLengthWithLabel = shuffleList[iShuffleList];
        //        int iSet = iShuffleList * cCrossValPart / shuffleList.Count;
        //        rgrg[iSet].Add(merAndHlaToLengthWithLabel.Key, merAndHlaToLengthWithLabel.Value);
        //    }
        //    return rgrg;
        //}


        public static EpitopeLearningData GetDbWhole(HlaFactory hlaFactory, int eLength, string datasetName, string fileOrResourceName)
        {
            Set <string>        wantedSet = CreateSourceSet(datasetName);
            EpitopeLearningData rg        = new EpitopeLearningData(datasetName);

            //SpecialFunctions.CheckCondition(hlaResolution.Equals(HlaResolution.ABMixed));
            foreach (Dictionary <string, string> row in Predictor.TabFileTable(fileOrResourceName, "peptide	hla	source	label", false))
            {
                string peptide = row["peptide"];
                SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(peptide), string.Format("Peptide, '{0}', contains illegal char.", peptide));

                if (peptide.Length != eLength) //!!!const
                {
                    continue;
                }

                string       source      = row["source"];
                Set <string> providedSet = CreateSourceSet(source);
                //Debug.Assert(providedSet.IsSubsetOf(Set<string>.GetInstance(new string[] { "Aplus", "LANL", "IEDB" }))); // real assert
                if (providedSet.IntersectionIsEmpty(wantedSet))
                {
                    continue;
                }


                Hla hla = hlaFactory.GetGroundInstance(row["hla"]);
                //HlaToLength hlaToLength = HlaToLength.GetInstance(hla, hlaResolution);
                Pair <string, Hla> peptideAndHla = new Pair <string, Hla>(peptide, hla);
                //MerAndHlaToLength aMerAndHlaToLength = MerAndHlaToLength.GetInstance(peptide, hlaToLength, kmerDefinition);

                string label = row["label"];
                SpecialFunctions.CheckCondition(label == "0" || label == "1", string.Format("Warning: Epitope example {0} has unknown label {1} and will be ignored.", peptideAndHla, label));
                rg[peptideAndHla] = (label == "1");
            }

            return(rg);
        }
        public static Dictionary <string, List <string> > CreateAAToPropList()
        {
            Dictionary <string, List <string> > aaToPropList = new Dictionary <string, List <string> >();

            foreach (string aa in Biology.GetInstance().AminoAcidCollection.Keys)
            {
                if (aa == "STOP" || aa == "DELETE")
                {
                    continue;
                }
                bool[]        rgsBit   = KmerProperties.GetInstance().AABits[aa];
                List <string> probList = new List <string>();
                for (int iProperty = 0; iProperty < rgsBit.Length; ++iProperty)
                {
                    if (rgsBit[iProperty])
                    {
                        string property = KmerProperties.GetInstance().AAPropertyCollection[iProperty];
                        probList.Add(property);
                    }
                }
                aaToPropList.Add(aa, probList);
            }
            return(aaToPropList);
        }
        private void ReadFile()
        {
            for (HowConsevered howConsevered = HowConsevered.Conserved; howConsevered <= HowConsevered.SemiConserved; ++howConsevered)
            {
                HowConseveredToForward[(int)howConsevered]  = new SortedList();
                HowConseveredToBackward[(int)howConsevered] = new SortedList();
            }



            string inputFileName = @"SimilarityOfAminoAcids.txt";

            SortedList rgHeadings = new SortedList();

            using (StreamReader streamreaderInputFile = Predictor.OpenResource(inputFileName))
            {
                string sLine;
                char   cPrevHeading = '\0';
                while (null != (sLine = streamreaderInputFile.ReadLine()))
                {
                    if (sLine.StartsWith("//"))
                    {
                        continue;
                    }

                    //There must be a line for every amino acid and they must be in alpha order
                    string[] tableParts = sLine.Split(' ');                     //!!!const
                    SpecialFunctions.CheckCondition(tableParts.Length == 3);    //!!!raise error
                    Debug.Assert(tableParts[0].Length > 0);
                    char cHeading = tableParts[0][0];
                    SpecialFunctions.CheckCondition(cPrevHeading < cHeading);                                                       //!!!raise error
                    cPrevHeading = cHeading;
                    SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(cHeading)); //!!!raise error
                    SpecialFunctions.CheckCondition(!rgHeadings.ContainsKey(cHeading));                                             //!!!raise error
                    rgHeadings.Add(cHeading, null);

                    //We must see every amino acid in the line;
                    SortedList rgInLine = new SortedList();
                    foreach (string part in tableParts)
                    {
                        foreach (char cAA in part)
                        {
                            SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(cAA)); //!!!raise error
                            SpecialFunctions.CheckCondition(!rgInLine.ContainsKey(cAA));                                               //!!!raise error
                            rgInLine.Add(cAA, null);
                        }
                    }
                    SpecialFunctions.CheckCondition(rgInLine.Count == 20);                     //!!!raise error

                    foreach (char cAA in tableParts[(int)HowConsevered.Conserved])
                    {
                        AddPair(cHeading, cAA, HowConsevered.Conserved);
                        AddPair(cHeading, cAA, HowConsevered.SemiConserved);
                    }

                    foreach (char cAA in tableParts[(int)HowConsevered.SemiConserved])
                    {
                        AddPair(cHeading, cAA, HowConsevered.SemiConserved);
                    }
                }
            }
            SpecialFunctions.CheckCondition(rgHeadings.Count == 20);            //!!!raise error
        }