Example #1
0
        //-----------------------------



        //-----------------------------------------------------------------------------------------------------



        public void RebuildProteinsFromScans()
        {
            //Find the proteins that have
            MyProteinList.AsParallel().ForAll(a => a.Scans = a.Scans.Intersect(AllSQTScans).ToList());
            MyProteinList.RemoveAll(a => a.Scans.Count == 0);
            RebuildPeptideListFromUpdatedProteinList();
        }
Example #2
0
        public int RemoveDecoyProteins(string decoyTag)
        {
            int i = MyProteinList.RemoveAll(a => a.Locus.StartsWith(decoyTag));

            RebuildScansFromModifiedProteinList();
            return(i);
        }
Example #3
0
        private void RebuildPeptideListFromUpdatedProteinList()
        {
            Console.WriteLine("  Building peptide list from protein list");

            List <string> cleanedSequences = AllCleanedPeptideSequences;


            //Patch pointed out by tiago balbuena to deal with peptides of same sequence but different flanking aa.

            Dictionary <string, List <PeptideResult> >
            seqCounter = (from prot in MyProteinList.AsParallel()
                          from pep in prot.PeptideResults
                          where cleanedSequences.Contains(pep.CleanedPeptideSequence)
                          group pep by pep.CleanedPeptideSequence into g
                          select new { Sequence = g.Key, Peptides = g }).ToDictionary(a => a.Sequence, a => a.Peptides.ToList());

            foreach (KeyValuePair <string, List <PeptideResult> > kvp in seqCounter)
            {
                if (kvp.Value.Count > 1)
                {
                    List <string> dSequences = (from pr in kvp.Value
                                                select pr.PeptideSequence).Distinct().ToList();

                    if (dSequences.Count > 1)
                    {
                        //Lets create a new peptide result
                        List <SQTScan> allScans = new List <SQTScan>();
                        foreach (PeptideResult pr in kvp.Value)
                        {
                            allScans.AddRange(pr.MyScans);
                        }

                        allScans = allScans.Distinct().ToList();

                        PeptideResult surrogate = new PeptideResult(kvp.Key, allScans);
                        kvp.Value.Clear();
                        kvp.Value.Add(surrogate);
                    }
                }
            }



            MyPeptideList = new List <PeptideResult>(seqCounter.Keys.Count);
            foreach (KeyValuePair <string, List <PeptideResult> > kvp in seqCounter)
            {
                MyPeptideList.Add(kvp.Value[0]);
            }

            //RebuildScansFromProteins();

            Console.WriteLine("  Done building peptide list");
        }
Example #4
0
        /// <summary>
        /// Applies the Bayesian Discriminant function at protein level
        /// </summary>
        /// <param name="myParams"></param>
        public void BayesianCleaningAtProteinLevel()
        {
            int minNoExamplesPerClass = 5;

            PatternTools.GaussianDiscriminant.Discriminant gd = new PatternTools.GaussianDiscriminant.Discriminant();
            List <int> dims = new List <int> {
                0, 1, 2, 3, 4, 5
            };

            int negativeClassExampleCounter = 0;

            foreach (MyProtein p in MyProteinList)
            {
                //Find out what class does this belong
                double label = 1;
                if (p.Locus.StartsWith(myParams.LabeledDecoyTag))
                {
                    label = -1;
                    negativeClassExampleCounter++;
                }

                gd.MySparseMatrix.addRow(new sparseMatrixRow((int)label, dims, p.InputVector));
            }


            //We need to make sure everything is working properly in this new normalization
            //This greately degrades the classifier!!! never use!!!!
            //gd.MySparseMatrix.NormalizeAllColumnsToRangeFrom0To1New();

            //------
            Console.WriteLine("Target examples for protein model = " + gd.MySparseMatrix.theMatrixInRows.FindAll(a => a.Lable == 1).Count);
            Console.WriteLine("Decoy examples for protein model = " + gd.MySparseMatrix.theMatrixInRows.FindAll(a => a.Lable == -1).Count);
            gd.Model(false, new List <int>(), minNoExamplesPerClass, true, false);

            if (gd.ClassLableClassPackageDic.Keys.Count != 2)
            {
                throw new System.ArgumentException("Not enough examples to generate protein classification model.  No available negative datapoints: " + negativeClassExampleCounter);
            }


            Parallel.ForEach(MyProteinList, r =>
                             //foreach (Scan s in p.MyScans)
            {
                //The result is ordered by class number
                var results = gd.Classify(r.InputVector.ToArray());
                double BayesianDiference = results[0].Score - results[1].Score;
                r.BayesianScore          = BayesianDiference;
            }
                             );

            double BayesianMin = MyProteinList.Min(a => a.BayesianScore);
            double BayesianMax = MyProteinList.Max(a => a.BayesianScore);
            double BayesianDif = BayesianMax - BayesianMin;

            MyProteinList.Sort((a, b) => b.BayesianScore.CompareTo(a.BayesianScore));
            int numberOfReverseProteins = MyProteinList.FindAll(a => a.Locus.StartsWith(myParams.LabeledDecoyTag)).Count;
            int numberOfForwardProteins = MyProteinList.FindAll(a => !a.Locus.StartsWith(myParams.LabeledDecoyTag)).Count;

            //Now lets do the filtering
            int cutOffValue = MyProteinList.Count;

            for (cutOffValue = MyProteinList.Count - 1; cutOffValue > 0; cutOffValue--)
            {
                if (MyProteinList[cutOffValue].Locus.StartsWith(myParams.LabeledDecoyTag))
                {
                    numberOfReverseProteins--;
                }
                else
                {
                    numberOfForwardProteins--;
                }
                //Calculate FDR;
                double fdr = (double)numberOfReverseProteins / ((double)numberOfForwardProteins + (double)numberOfReverseProteins);

                if (fdr <= myParams.ProteinFDR)
                {
                    break;
                }
            }

            MyProteinList.RemoveRange(cutOffValue, MyProteinList.Count - cutOffValue);

            //Must cal this method to correct for the removed proteins
            RebuildScansFromModifiedProteinList();
        }