//----------------------------- //----------------------------------------------------------------------------------------------------- public void RebuildProteinsFromScans() { //Find the proteins that have MyProteinList.AsParallel().ForAll(a => a.Scans = a.Scans.Intersect(AllSQTScans).ToList()); MyProteinList.RemoveAll(a => a.Scans.Count == 0); RebuildPeptideListFromUpdatedProteinList(); }
public int RemoveDecoyProteins(string decoyTag) { int i = MyProteinList.RemoveAll(a => a.Locus.StartsWith(decoyTag)); RebuildScansFromModifiedProteinList(); return(i); }
private void RebuildPeptideListFromUpdatedProteinList() { Console.WriteLine(" Building peptide list from protein list"); List <string> cleanedSequences = AllCleanedPeptideSequences; //Patch pointed out by tiago balbuena to deal with peptides of same sequence but different flanking aa. Dictionary <string, List <PeptideResult> > seqCounter = (from prot in MyProteinList.AsParallel() from pep in prot.PeptideResults where cleanedSequences.Contains(pep.CleanedPeptideSequence) group pep by pep.CleanedPeptideSequence into g select new { Sequence = g.Key, Peptides = g }).ToDictionary(a => a.Sequence, a => a.Peptides.ToList()); foreach (KeyValuePair <string, List <PeptideResult> > kvp in seqCounter) { if (kvp.Value.Count > 1) { List <string> dSequences = (from pr in kvp.Value select pr.PeptideSequence).Distinct().ToList(); if (dSequences.Count > 1) { //Lets create a new peptide result List <SQTScan> allScans = new List <SQTScan>(); foreach (PeptideResult pr in kvp.Value) { allScans.AddRange(pr.MyScans); } allScans = allScans.Distinct().ToList(); PeptideResult surrogate = new PeptideResult(kvp.Key, allScans); kvp.Value.Clear(); kvp.Value.Add(surrogate); } } } MyPeptideList = new List <PeptideResult>(seqCounter.Keys.Count); foreach (KeyValuePair <string, List <PeptideResult> > kvp in seqCounter) { MyPeptideList.Add(kvp.Value[0]); } //RebuildScansFromProteins(); Console.WriteLine(" Done building peptide list"); }
/// <summary> /// Applies the Bayesian Discriminant function at protein level /// </summary> /// <param name="myParams"></param> public void BayesianCleaningAtProteinLevel() { int minNoExamplesPerClass = 5; PatternTools.GaussianDiscriminant.Discriminant gd = new PatternTools.GaussianDiscriminant.Discriminant(); List <int> dims = new List <int> { 0, 1, 2, 3, 4, 5 }; int negativeClassExampleCounter = 0; foreach (MyProtein p in MyProteinList) { //Find out what class does this belong double label = 1; if (p.Locus.StartsWith(myParams.LabeledDecoyTag)) { label = -1; negativeClassExampleCounter++; } gd.MySparseMatrix.addRow(new sparseMatrixRow((int)label, dims, p.InputVector)); } //We need to make sure everything is working properly in this new normalization //This greately degrades the classifier!!! never use!!!! //gd.MySparseMatrix.NormalizeAllColumnsToRangeFrom0To1New(); //------ Console.WriteLine("Target examples for protein model = " + gd.MySparseMatrix.theMatrixInRows.FindAll(a => a.Lable == 1).Count); Console.WriteLine("Decoy examples for protein model = " + gd.MySparseMatrix.theMatrixInRows.FindAll(a => a.Lable == -1).Count); gd.Model(false, new List <int>(), minNoExamplesPerClass, true, false); if (gd.ClassLableClassPackageDic.Keys.Count != 2) { throw new System.ArgumentException("Not enough examples to generate protein classification model. No available negative datapoints: " + negativeClassExampleCounter); } Parallel.ForEach(MyProteinList, r => //foreach (Scan s in p.MyScans) { //The result is ordered by class number var results = gd.Classify(r.InputVector.ToArray()); double BayesianDiference = results[0].Score - results[1].Score; r.BayesianScore = BayesianDiference; } ); double BayesianMin = MyProteinList.Min(a => a.BayesianScore); double BayesianMax = MyProteinList.Max(a => a.BayesianScore); double BayesianDif = BayesianMax - BayesianMin; MyProteinList.Sort((a, b) => b.BayesianScore.CompareTo(a.BayesianScore)); int numberOfReverseProteins = MyProteinList.FindAll(a => a.Locus.StartsWith(myParams.LabeledDecoyTag)).Count; int numberOfForwardProteins = MyProteinList.FindAll(a => !a.Locus.StartsWith(myParams.LabeledDecoyTag)).Count; //Now lets do the filtering int cutOffValue = MyProteinList.Count; for (cutOffValue = MyProteinList.Count - 1; cutOffValue > 0; cutOffValue--) { if (MyProteinList[cutOffValue].Locus.StartsWith(myParams.LabeledDecoyTag)) { numberOfReverseProteins--; } else { numberOfForwardProteins--; } //Calculate FDR; double fdr = (double)numberOfReverseProteins / ((double)numberOfForwardProteins + (double)numberOfReverseProteins); if (fdr <= myParams.ProteinFDR) { break; } } MyProteinList.RemoveRange(cutOffValue, MyProteinList.Count - cutOffValue); //Must cal this method to correct for the removed proteins RebuildScansFromModifiedProteinList(); }