override public IEnumerable <TrueCollection> Collection()
        {
            Set <Hla> assignmentAsSet = CreateAssignmentAsSet(QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ);

            foreach (Hla originalTrueHla in QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ)
            {
                if (QmmrModelOnePeptide.QmrrModelMissingAssignment.KnownHlaSet.Contains(originalTrueHla))
                {
                    continue;
                }
                assignmentAsSet.Remove(originalTrueHla);
                foreach (Hla originalFalseHla in QmmrModelOnePeptide.QmrrModelMissingAssignment.SwitchableHlasOfRespondingPatients)
                {
                    if (!assignmentAsSet.Contains(originalFalseHla))
                    {
                        assignmentAsSet.AddNew(originalFalseHla);
                        Debug.Assert(assignmentAsSet.Count == QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ.Count);                         // real assert
                        yield return(TrueCollection.GetInstance(assignmentAsSet));

                        assignmentAsSet.Remove(originalFalseHla);
                    }
                }
                assignmentAsSet.AddNew(originalTrueHla);
                Debug.Assert(assignmentAsSet.Count == QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ.Count);                 // real assert
            }
        }
        private static void AddEiFeatures(bool includeChemicalProperties, bool includeAAFeatures, bool substractChemAACrissCrossFeatures, NEC nec, Pair <NEC, Hla> necAndHla, Set <IHashableFeature> hlaishFeatureSet, Set <IHashableFeature> featureSet)
        {
            for (int i = 0; i < nec.E.Length; ++i)
            {
                IHashableFeature featureE  = E.GetInstance(i + 1);
                string           aminoAcid = GetAminoAcidFromEpitopePosition(nec, i);

                if (includeAAFeatures)
                {
                    IsAA featureAA = IsAA.GetInstance(aminoAcid, featureE);
                    featureSet.AddNew(featureAA);
                    Debug.Assert((bool)featureAA.Evaluate(necAndHla)); // real assert - must only generate true features

                    foreach (IHashableFeature hlaishFeature in hlaishFeatureSet)
                    {
                        if (substractChemAACrissCrossFeatures && hlaishFeature is HasAAProp)
                        {
                            continue;
                        }

                        And featureHlaishAndAA = And.GetInstance(hlaishFeature, featureAA);
                        featureSet.AddNew(featureHlaishAndAA);
                        Debug.Assert((bool)featureHlaishAndAA.Evaluate(necAndHla)); // real assert - must only generate true features
                    }
                }

                if (includeChemicalProperties)
                {
                    foreach (string property in VirusCount.KmerProperties.AaToPropList[aminoAcid])
                    {
                        HasAAProp featureAAProp = HasAAProp.GetInstance(property, featureE);
                        featureSet.AddNew(featureAAProp);
                        Debug.Assert((bool)featureAAProp.Evaluate(necAndHla)); // real assert - must only generate true features

                        foreach (IHashableFeature hlaishFeature in hlaishFeatureSet)
                        {
                            if (substractChemAACrissCrossFeatures && hlaishFeature is IsAA)
                            {
                                continue;
                            }

                            And featureHlaishAndAAProb = And.GetInstance(hlaishFeature, featureAAProp);
                            featureSet.AddNew(featureHlaishAndAAProb);
                            Debug.Assert((bool)featureHlaishAndAAProb.Evaluate(necAndHla)); // real assert - must only generate true features
                        }
                    }
                }
            }

            //All of the above with AA replaced by chemical property of AA
        }
예제 #3
0
        ////!!!same logic is elseware. Look for common heading
        //private static Dictionary<string, Dictionary<string, double>> LoadReactTableUnfiltered(TextReader patientReader, out Set<string> cidsInReactTable)
        //{
        //    cidsInReactTable = Set<string>.GetInstance();

        //    Dictionary<string, Dictionary<string, double>> reactTable = new Dictionary<string, Dictionary<string, double>>();
        //    string header = "peptide	cid	magnitude";
        //    foreach (Dictionary<string, string> row in SpecialFunctions.TabFileTable(patientReader, header, false))
        //    {
        //        string cid = row["cid"];
        //        cidsInReactTable.AddNewOrOld(cid);
        //        string peptide = row["peptide"];
        //        double amount = double.Parse(row["magnitude"]);

        //        Dictionary<string, double> peptideToAmount = SpecialFunctions.GetValueOrDefault(reactTable, peptide);
        //        peptideToAmount.Add(cid, amount);
        //    }
        //    return reactTable;
        //}

        private Dictionary <string, Set <Hla> > LoadKnownTable(DbDataReader datareader, string caseName)
        {
            //!!!code appears elsewhere. Look for common header
            Dictionary <string, Set <Hla> > knownTable = new Dictionary <string, Set <Hla> >();

            int indexPeptide = datareader.GetOrdinal("peptide");
            int indexHLA     = datareader.GetOrdinal("knownHLA");

            int irecord = 0;

            while (datareader.Read())
            {
                ++irecord;

                string peptide = datareader.GetString(indexPeptide).Trim();
                string hlaName = datareader.GetString(indexHLA).Trim();

                if (peptide.Length == 0 || hlaName.Length == 0)
                {
                    continue;
                }

                Set <Hla> knownHlaSet = SpecialFunctions.GetValueOrDefault(knownTable, peptide);
                Hla       hla         = HlaFactory.GetGroundInstance(hlaName);
                SpecialFunctions.CheckCondition(!knownHlaSet.Contains(hla), string.Format("Hla {0} is duplicated in {1}known.txt, for peptide {2}", hla, caseName, peptide));
                knownHlaSet.AddNew(hla); //!!!const
            }

            Console.WriteLine("{0}: number of records read: {1}", "Known table", irecord);

            return(knownTable);
        }
예제 #4
0
        protected override MBT.Escience.Set <char> GetResidueSetFromCharAndCheckThatValid(char ch)
        {
            char       chUpper = char.ToUpper(ch);
            Biology    biology = Biology.GetInstance();
            Set <char> result;

            if (biology.Unambiguous1LetterNucCodes.Contains(chUpper))
            {
                result = new Set <char>(chUpper);
            }
            else if (biology.Ambiguous1LetterNucCodeToChoices.ContainsKey(chUpper))
            {
                string basesAsString = biology.Ambiguous1LetterNucCodeToChoices[chUpper];
                result = new Set <char>();
                foreach (char c in basesAsString)
                {
                    result.AddNew(c);
                }
            }
            else
            {
                throw new ArgumentException("Do not know dna base " + chUpper);
            }

            return(result);
        }
예제 #5
0
        public IEnumerable <Pair <string, Hla> > PositiveExampleEnumeration()
        {
            //Don't repeat if the Mer/OriHla has already been seen, but DO repeat if the mer/HlaNorm has appeared before
            Set <Pair <string, Hla> > merAndHlaOriSet = new Set <Pair <string, Hla> >();

            foreach (EpitopeLearningDataDupHlaOK epitopeLearningData in EpitopeLearningDataList)
            {
                if (null != epitopeLearningData)
                {
                    foreach (KeyValuePair <string, Dictionary <Hla, Dictionary <Hla, bool> > > merAndHlaNormToHlaOriToLabel in epitopeLearningData)
                    {
                        string mer = merAndHlaNormToHlaOriToLabel.Key;
                        foreach (KeyValuePair <Hla, Dictionary <Hla, bool> > hlaNormAndHlaNormToLabel in merAndHlaNormToHlaOriToLabel.Value)
                        {
                            Pair <string, Hla> merAndHlaNorm = new Pair <string, Hla>(mer, hlaNormAndHlaNormToLabel.Key);
                            foreach (KeyValuePair <Hla, bool> HlaOriAndLabel in hlaNormAndHlaNormToLabel.Value)
                            {
                                Pair <string, Hla> merAndHlaOri = new Pair <string, Hla>(mer, HlaOriAndLabel.Key);
                                if (!merAndHlaOriSet.Contains(merAndHlaOri))
                                {
                                    merAndHlaOriSet.AddNew(merAndHlaOri);
                                    yield return(merAndHlaNorm);
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #6
0
        private IEnumerable <string> EveryUnambiguousStopFreeMer(int merLength, Dictionary <string, AASeq> caseToCompressedAASeq)
        {
            Set <string> nonMissingMerSet = Set <string> .GetInstance();

            foreach (AASeq aaSeq in caseToCompressedAASeq.Values)
            {
                foreach (AASeq mer in aaSeq.SubSeqEnumeration(merLength))
                {
                    if (mer.Ambiguous)
                    {
                        continue;
                    }
                    string merAsString = mer.ToString();
                    if (merAsString.Contains("*"))
                    {
                        continue;
                    }

                    Debug.Assert(!merAsString.Contains("-") && !merAsString.Contains("?")); // real assert
                    if (nonMissingMerSet.Contains(merAsString))
                    {
                        continue;
                    }
                    nonMissingMerSet.AddNew(merAsString);
                    yield return(merAsString);
                }
            }
        }
        static public IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > LoadSparseFileEnumeration(string sparseFileName) //where T1:ISufficientStatistics
        {
            Set <string> variablesAlreadySeenSet = new Set <string>();

            Pair <string, Dictionary <string, SufficientStatistics> > variableAndCaseIdToNonMissingValue = null;

            foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(sparseFileName, "var\tcid\tval", false))
            {
                string variable = row["var"];
                if (variableAndCaseIdToNonMissingValue != null && variableAndCaseIdToNonMissingValue.First != variable)
                {
                    yield return(variableAndCaseIdToNonMissingValue);

                    variableAndCaseIdToNonMissingValue = null;
                }
                if (variableAndCaseIdToNonMissingValue == null)
                {
                    SpecialFunctions.CheckCondition(!variablesAlreadySeenSet.Contains(variable), string.Format("Input file ({0}) is not grouped by variable. Variable {1} appears in multiple places", sparseFileName, variable));
                    variablesAlreadySeenSet.AddNew(variable);
                    variableAndCaseIdToNonMissingValue =
                        new Pair <string, Dictionary <string, SufficientStatistics> >(variable, new Dictionary <string, SufficientStatistics>());
                }
                string caseId            = row["cid"];
                SufficientStatistics val = SufficientStatistics.Parse(row["val"]);

                SpecialFunctions.CheckCondition(!variableAndCaseIdToNonMissingValue.Second.ContainsKey(caseId), string.Format("Input file ({0}) for var {1} contains multiple entries for caseId {2}", sparseFileName, variable, caseId));
                variableAndCaseIdToNonMissingValue.Second.Add(caseId, val);
            }

            if (variableAndCaseIdToNonMissingValue != null)
            {
                yield return(variableAndCaseIdToNonMissingValue);
            }
        }
예제 #8
0
        public List <Set <char> > CreateCharSetList(string aaSeqAsString)
        {
            List <Set <char> > sequence = new List <Set <char> >();

            Set <char> set = null;

            foreach (char ch in aaSeqAsString)
            {
                switch (ch)
                {
                case '{':
                {
                    Helper.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings");
                    set = new Set <char>();
                    sequence.Add(set);
                    break;
                }

                case '}':
                {
                    Helper.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings");
                    Helper.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings");
                    set = null;
                    break;
                }

                case ' ':
                {
                    Helper.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing.");
                    break;
                }

                case '?':
                case '-':
                {
                    Helper.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch));
                    sequence.Add(Set <char> .GetInstance(ch));
                    break;
                }

                default:
                {
                    Set <char> charAsSet = GetResidueSetFromCharAndCheckThatValid(ch);

                    if (set == null)
                    {
                        sequence.Add(charAsSet);
                    }
                    else
                    {
                        set.AddNew(ch);
                    }
                    break;
                }
                }
            }
            Helper.CheckCondition(set == null, "Missing '}' in aaSeq string");
            return(sequence);
        }
        internal static Set <Hla> CreateAssignmentAsSet(TrueCollection startingAssignment)
        {
            Set <Hla> assignmentAsSet = Set <Hla> .GetInstance();

            foreach (Hla hla in startingAssignment)
            {
                assignmentAsSet.AddNew(hla);
            }
            return(assignmentAsSet);
        }
예제 #10
0
        private static Set <string> CreateSourceSet(string datasetName)
        {
            Set <string> sourceSet = Set <string> .GetInstance();

            foreach (string source in datasetName.Split('+', ','))
            {
                sourceSet.AddNew(source.ToLower());
            }
            return(sourceSet);
        }
        private static void SetUpInterestingHlaPositionsForAAFeaturesTable(Dictionary <string, string> hlaNameToSequence)
        {
            int sumOfNonMaxMin = ReadSumOfNonMaxMin();

            string firstSequence = SpecialFunctions.First(hlaNameToSequence.Values);

            InterestingToAAExact = Set <HlaSeq> .GetInstance();

            InterestingToAAAsPrefix = Set <HlaSeq> .GetInstance();

            InterestingHlaSeqToPropertiesExact    = new Dictionary <HlaSeq, Set <string> >();
            InterestingHlaSeqToPropertiesAsPrefix = new Dictionary <HlaSeq, Set <string> >();

            for (int posBase0 = 0; posBase0 < firstSequence.Length; ++posBase0)
            {
                Dictionary <char, int> aaToCount = new Dictionary <char, int>();
                Dictionary <string, Dictionary <bool, int> > propertyToValueToCount = new Dictionary <string, Dictionary <bool, int> >();
                foreach (string sequence in hlaNameToSequence.Values)
                {
                    char aa = sequence[posBase0];
                    aaToCount[aa] = SpecialFunctions.GetValueOrDefault(aaToCount, aa) + 1;
                    //string aaAsString = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[aa];

                    foreach (KeyValuePair <string, Set <char> > propertyAndAASet in VirusCount.KmerProperties.GetInstance().PropertyToAACharSet)
                    {
                        string                 property     = propertyAndAASet.Key;
                        Set <char>             aaSet        = propertyAndAASet.Value;
                        bool                   val          = aaSet.Contains(aa);
                        Dictionary <bool, int> valueToCount = SpecialFunctions.GetValueOrDefault(propertyToValueToCount, property);
                        valueToCount[val] = 1 + SpecialFunctions.GetValueOrDefault(valueToCount, val);
                    }
                }

                if (SumOfNonMax(aaToCount.Values) >= sumOfNonMaxMin)
                {
                    HlaSeq hlaSeqExact = HlaSeq.GetInstance(posBase0 + 1, false);
                    InterestingToAAExact.AddNew(hlaSeqExact);

                    HlaSeq hlaSeqAsPrefix = HlaSeq.GetInstance(posBase0 + 1, true);
                    InterestingToAAAsPrefix.AddNew(hlaSeqAsPrefix);

                    foreach (string property in propertyToValueToCount.Keys)
                    {
                        Dictionary <bool, int> valueToCount = propertyToValueToCount[property];
                        if (SumOfNonMax(valueToCount.Values) >= sumOfNonMaxMin)
                        {
                            Set <string> interestingPropertiesExact = SpecialFunctions.GetValueOrDefault(InterestingHlaSeqToPropertiesExact, hlaSeqExact);
                            interestingPropertiesExact.AddNew(property);
                            Set <string> interestingPropertiesAsPrefix = SpecialFunctions.GetValueOrDefault(InterestingHlaSeqToPropertiesAsPrefix, hlaSeqAsPrefix);
                            interestingPropertiesAsPrefix.AddNew(property);
                        }
                    }
                }
            }
        }
 private static void BitFlip(Set <Hla> assignmentAsSet, Hla hlaOfRepondingPatients)
 {
     if (assignmentAsSet.Contains(hlaOfRepondingPatients))
     {
         assignmentAsSet.Remove(hlaOfRepondingPatients);
     }
     else
     {
         assignmentAsSet.AddNew(hlaOfRepondingPatients);
     }
 }
        private static void CreateAndAddHlaFeature(bool subtractHlaFeatures, Hla hla, Pair <NEC, Hla> necAndHla, ref Set <IHashableFeature> hlaishEnumeration)
        {
            if (subtractHlaFeatures)
            {
                return;
            }

            IsHla featureHla = IsHla.GetInstance(hla);

            hlaishEnumeration.AddNew(featureHla);
            Debug.Assert((bool)featureHla.Evaluate(necAndHla)); // real assert - must only generate true features
        }
        override public IEnumerable <TrueCollection> Collection()
        {
            Set <Hla> assignmentAsSet = CreateAssignmentAsSet(QmmrModelOnePeptide.BestHlaAssignmentSoFar.Champ);

            foreach (Hla hla in QmmrModelOnePeptide.QmrrModelMissingAssignment.SwitchableHlasOfRespondingPatients)
            {
                if (assignmentAsSet.Contains(hla))
                {
                    assignmentAsSet.Remove(hla);
                    yield return(TrueCollection.GetInstance(assignmentAsSet));

                    assignmentAsSet.AddNew(hla);
                }
                else
                {
                    assignmentAsSet.AddNew(hla);
                    yield return(TrueCollection.GetInstance(assignmentAsSet));

                    assignmentAsSet.Remove(hla);
                }
            }
        }
예제 #15
0
 private void ReadKnownTable()
 {
     _knownTable = new Dictionary <string, Set <Hla> >();
     if (OptimizationParameterList["useKnownList"].Value == 1)
     {
         foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(KnownFileName, "peptide	knownHLA", false))    //!!!const
         {
             string    peptide     = row["peptide"];                                                                          //!!!const
             Set <Hla> knownHlaSet = SpecialFunctions.GetValueOrDefault(_knownTable, peptide);
             knownHlaSet.AddNew(HlaFactory.GetGroundInstance(row["knownHLA"]));                                               //!!!const
         }
     }
 }
예제 #16
0
        static public List <NamedSequence> RemoveDuplicateSeqs(List <NamedSequence> seqs)
        {
            Set <string>         observedSeqs = new Set <string>();
            List <NamedSequence> result       = new List <NamedSequence>(seqs.Count);

            foreach (NamedSequence seq in seqs)
            {
                if (!observedSeqs.Contains(seq.Sequence.ToLower()))
                {
                    result.Add(seq);
                    observedSeqs.AddNew(seq.Sequence.ToLower());
                }
            }
            return(result);
        }
        public static Set <char> PropertyNameToAASet(string propertyName)
        {
            KmerProperties kmerProperties = KmerProperties.GetInstance();

            int propertyNum = kmerProperties.PropertyToNumber[propertyName];

            Set <char> aaSet = Set <char> .GetInstance();

            foreach (KeyValuePair <string, bool[]> aaAndBits in kmerProperties.AABits)
            {
                if (aaAndBits.Value[propertyNum])
                {
                    char aa = Biology.GetInstance().ThreeLetterAminoAcidAbbrevTo1Letter[aaAndBits.Key];
                    aaSet.AddNew(aa);
                }
            }

            return(aaSet);
        }
예제 #18
0
        public void TriplesAppend(ref Set <string> seenTriple, ref Dictionary <string, List <string> > proteinToTripleList)
        {
            AASeq aaSeq = GetFirstAASeq();

            for (int aa0Pos = 0; aa0Pos < (int)SequenceLengthOrNull; ++aa0Pos)
            {
                string   posName  = aaSeq.OriginalAA1Position(aa0Pos);
                string[] posParts = posName.Split('@');
                string   protein  = posParts[0];
                string   hxb2Pos  = posParts[2];

                string triple = Helper.CreateTabString(protein, hxb2Pos);
                if (!seenTriple.Contains(triple))
                {
                    seenTriple.AddNew(triple);
                    List <string> tripleList = proteinToTripleList.GetValueOrDefault(protein);
                    tripleList.Add(triple);
                }
            }
        }
        private static void CreateAndAddFeatureSupertype(string supertypeTableSource, bool subtractSupertypeFeatures, Hla hla, Pair <NEC, Hla> necAndHla, ref Set <IHashableFeature> hlaishEnumeration, Assembly assembly, string resourcePrefix)
        {
            if (subtractSupertypeFeatures)
            {
                return;
            }

            IHashableFeature featureSupertype;

            switch (supertypeTableSource)
            {
            case SupertypeTableSource.None:
                featureSupertype = null;
                break;

            default:
                featureSupertype = (IHashableFeature)IsSupertypeFromFile.GetInstance(hla, supertypeTableSource, assembly, resourcePrefix);
                break;
            }
            hlaishEnumeration.AddNew(featureSupertype);
            Debug.Assert((bool)((Feature)featureSupertype).Evaluate(necAndHla)); // real assert - must only generate true features
        }
예제 #20
0
        private static List <Set <char> > CreateSequence(string aaSeqAsString)
        {
            List <Set <char> > sequence = new List <Set <char> >();

            Set <char> set = null;

            foreach (char ch in aaSeqAsString)
            {
                switch (ch)
                {
                case '{':
                {
                    SpecialFunctions.CheckCondition(set == null, "Nested '{''s are not allowed in aaSeq strings");
                    set = new Set <char>();
                    sequence.Add(set);
                    break;
                }

                case '}':
                {
                    SpecialFunctions.CheckCondition(set != null, "'}' must follow a '{' in aaSeq strings");
                    SpecialFunctions.CheckCondition(set.Count > 0, "Empty sets not allow in aaSeq strings");
                    set = null;
                    break;
                }

                case ' ':
                {
                    SpecialFunctions.CheckCondition(false, "Sequences should not contain blanks. Use '?' for missing.");
                    break;
                }

                case '?':
                case '-':
                {
                    SpecialFunctions.CheckCondition(set == null, string.Format("'{0}' must not appear in sets", ch));
                    sequence.Add(Set <char> .GetInstance(ch));
                    break;
                }

                default:
                {
                    //!!!most this to Biology?
                    SpecialFunctions.CheckCondition(Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter.ContainsKey(ch),
                                                    string.Format("The character {0} is not an amino acid", ch));
                    string aminoAcid = Biology.GetInstance().OneLetterAminoAcidAbbrevTo3Letter[ch];
                    SpecialFunctions.CheckCondition(Biology.GetInstance().KnownAminoAcid(aminoAcid),
                                                    string.Format("The character {0} is not a standard amino acid", ch));
                    if (set == null)
                    {
                        sequence.Add(Set <char> .GetInstance(ch));
                    }
                    else
                    {
                        set.AddNew(ch);
                    }
                    break;
                }
                }
            }
            SpecialFunctions.CheckCondition(set == null, "Missing '}' in aaSeq string");
            return(sequence);
        }