public override IEnumerable <string> Process(string fileName) { var peptides = new MascotPeptideTextFormat().ReadFromFile(fileName); var seqs = (from p in peptides let pep = p.Peptide select PeptideUtils.GetMatchedSequence(pep.Sequence)).ToList(); var nmod = (from seq in seqs where !char.IsLetter(seq[0]) select seq).Count(); var kseqs = (from seq in seqs where seq.Contains(modificationAminoacid) select seq).Count(); var kmod = (from seq in seqs where seq.Contains(modificationAminoacid) && IsFullModifiedK(seq) select seq).Count(); var result = fileName + ".labelingEfficiency"; using (StreamWriter sw = new StreamWriter(result)) { sw.WriteLine("Total PSMs\t{0}", peptides.Count); sw.WriteLine("N-terminal modified PSMs\t{0}", nmod); sw.WriteLine("{0}-contained PSMs\t{1}", modificationAminoacid, kseqs); sw.WriteLine("{0}-full-modified PSMs\t{2}", modificationAminoacid, kmod); } return(new string[] { result }); }
public override IEnumerable <string> Process(string fileName) { var format = new MascotPeptideTextFormat(); var peptides = format.ReadFromFile(fileName); var resultpeptides = peptides.FindAll(m => { bool bNormal = false; bool bSnp = false; foreach (var p in m.Proteins) { if (!regex.Match(p).Success) { bNormal = true; } else { bSnp = true; } } return(!bNormal && bSnp); }); var result = FileUtils.ChangeExtension(fileName, ".snp.peptides"); format.WriteToFile(result, resultpeptides); return(new string[] { result }); }
protected override IIdentifiedResult GetIdentifiedResult(string fileName) { format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(fileName); IIdentifiedResult result; if (isSiteLevel) { result = IdentifiedSpectrumUtils.BuildGroupByPeptide(spectra); } else { result = IdentifiedSpectrumUtils.BuildGroupByUniquePeptide(spectra); } var map = SequenceUtils.ReadAccessNumberReferenceMap(new FastaFormat(), this.fastaFile, this.parser); foreach (var group in result) { var proteins = group[0].Description.Split('/'); group[0].Description = (from p in proteins let ac = parser.GetValue(p) select map[ac]).ToList().Merge(" ! "); } return(result); }
/* * public IPeptideMassCalculator GetPeptideMassCalculator(MascotModificationItem dynamicModification) * { * bool isMono = true; * * var aas = new Aminoacids(); * staticModification.ForEach(m => aas[aas].ResetMass(aas[m].MonoMass + staticModifications[aa], aas[aa].AverageMass + staticModifications[aa]); * } * * var diff = new[] { '*', '#', '@', '^', '~', '$' }; * int i = 0; * foreach (double mod in Diff_search_options.Values) * { * aas[diff[i++]].ResetMass(mod, mod); * } * * double nterm = isMono ? Atom.H.MonoMass : Atom.H.AverageMass; * double cterm = isMono ? Atom.H.MonoMass + Atom.O.MonoMass : Atom.H.AverageMass + Atom.O.AverageMass; * * if (this.term_diff_search_options.First != 0.0 || this.term_diff_search_options.Second != 0.0) * { * throw new Exception( * "Term dynamic modification has not been implemented into this function, call author to fix it."); * } * * IPeptideMassCalculator result; * if (isMono) * { * result = new MonoisotopicPeptideMassCalculator(aas, nterm, cterm); * } * else * { * result = new AveragePeptideMassCalculator(aas, nterm, cterm); * } * * return result; * } */ public override IEnumerable <string> Process(string fileName) { var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(fileName); return(null); }
public override IEnumerable <string> Process(string fileName) { var format = new MascotPeptideTextFormat(); var peptides = format.ReadFromFile(fileName); peptides.RemoveAll(m => m.QValue >= fdr); peptides.ForEach(m => m.TheoreticalMinusExperimentalMass = Math.Round(m.TheoreticalMinusExperimentalMass)); peptides.RemoveAll(m => m.TheoreticalMinusExperimentalMass == 0.0); var result1 = MyConvert.Format("{0}.fdr{1:0.000}.txt", fileName, fdr); format.WriteToFile(result1, peptides); var groups = peptides.GroupBy(m => m.TheoreticalMinusExperimentalMass).ToList(); groups.Sort((m1, m2) => - m1.Count().CompareTo(m2.Count())); var result2 = MyConvert.Format("{0}.fdr{1:0.000}.groups", fileName, fdr); using (StreamWriter sw = new StreamWriter(result2)) { foreach (var group in groups) { sw.WriteLine("{0:0}\t{1}", -group.Key, group.Count()); } } return(new string[] { result1, result2 }); }
public void TestSameEngineDifferentParameters() { ClassificationOptions co = new ClassificationOptions(); co.ClassifyByCharge = true; co.ClassifyByMissCleavage = true; co.ClassifyByModification = true; co.ModifiedAminoacids = "STY"; co.ClassifyByNumProteaseTermini = true; var s1 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1); s1.ForEach(m => m.Tag = "deisotopic"); var s2 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic-top10.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2); s2.ForEach(m => m.Tag = "deisotopic-top"); var all = s1.Union(s2).ToList(); var p1 = new List <IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction()); p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin1 = co.BuildSpectrumBin(p1); var p2 = new List <IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction()); p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin2 = co.BuildSpectrumBin(p2); bin2.ForEach(m => { IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction()); var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString())); Assert.AreEqual(m.Spectra.Count, n.Spectra.Count); //{ // if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2")) // { // Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992"))); // } // var diff1 = m.Spectra.Except(n.Spectra).ToList(); // Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString()); // diff1.ForEach(k => // { // var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName)); // lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence)); // }); //} }); }
public void TestFillProteinInformation() { var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/Test.output.xml.FDR0.01.peptides"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0)); IdentifiedSpectrumUtils.FillProteinInformation(peptides, TestContext.CurrentContext.TestDirectory + "/../../../data//Test.output.xml.FDR0.01.peptides.proteins"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0)); }
public void TestCalculateQValue() { var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/QTOF_Ecoli.LowRes.t.xml.peptides"); peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6); peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_"))); IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator()); Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001); }
public override IEnumerable <string> Process(string fileName) { var format = new MascotPeptideTextFormat(); var peptides = format.ReadFromFile(fileName); peptides.RemoveAll(m => m.Proteins.Any(n => decoyReg.Match(n).Success)); var result = FileUtils.ChangeExtension(fileName, ".target.peptides"); format.WriteToFile(result, peptides); return(new string[] { result }); }
protected override void PrepareBeforeProcessing(string peptideFile) { Progress.SetMessage("Reading peptides from " + peptideFile); var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var expMap = peptides.GroupBy(m => m.Query.FileScan.Experimental).ToDictionary(m => m.Key); expPPMMap = (from exp in expMap let mean = Statistics.Mean(from pep in exp.Value select PrecursorUtils.mz2ppm(pep.TheoreticalMass, pep.TheoreticalMinusExperimentalMass)) orderby exp.Key descending select new Pair <string, double>(exp.Key, mean)).ToList(); }
public override IEnumerable <string> Process() { var format = new MascotPeptideTextFormat(); Progress.SetMessage("reading peptide-spectra-matches from " + options.PeptideFile + " ..."); var spectra = format.ReadFromFile(options.PeptideFile); var seqMap = new Dictionary <string, IIdentifiedPeptide>(); foreach (var spec in spectra) { seqMap[spec.Peptide.PureSequence] = spec.Peptide; } var aas = (from c in new Aminoacids().GetVisibleAminoacids() where c != 'I' select c.ToString()).Merge(""); var ff = new FastaFormat(); Progress.SetMessage("inserting amino acid ..."); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine(File.ReadAllText(options.DatabaseFile)); var seqs = seqMap.Keys.OrderBy(m => m).ToArray(); var reversed_index = 1000000; foreach (var seq in seqs) { for (int i = 0; i < seq.Length; i++) { for (int j = 0; j < aas.Length; j++) { var newsequence = seq.Insert(i, aas[j].ToString()); var newref = string.Format("INS_{0}_{1}{2} Insertion of {3}", seq, i, aas[j], seqMap[seq].Proteins.Merge("/")); var newseq = new Sequence(newref, newsequence); ff.WriteSequence(sw, newseq); if (options.GenerateReversedPeptide) { var revsequence = SequenceUtils.GetReversedSequence(newsequence); var revref = string.Format("REVERSED_{0}", reversed_index++); var revseq = new Sequence(revref, revsequence); ff.WriteSequence(sw, revseq); } } } } } return(new[] { options.OutputFile }); }
private List <IIdentifiedSpectrum> DuplicateSpectrum(List <IIdentifiedSpectrum> spectra, string detailDir) { List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>(); Dictionary <string, List <string> > rawmap = new Dictionary <string, List <string> >(); foreach (var raws in rawpairs.Values) { foreach (var raw in raws) { rawmap[raw] = raws; } } var format = new MascotPeptideTextFormat(); foreach (var spectrum in spectra) { if (spectrum.HasRatio()) { var silacFile = spectrum.GetRatioFile(detailDir); var silacResult = new SilacQuantificationSummaryItemXmlFormat().ReadFromFile(silacFile); var maxIntensity = silacResult.ObservedEnvelopes.Max(m => Math.Max(m.LightIntensity, m.HeavyIntensity)); var scan = silacResult.ObservedEnvelopes.Find(m => m.LightIntensity == maxIntensity || m.HeavyIntensity == maxIntensity).Scan; var str = format.PeptideFormat.GetString(spectrum); var oldraw = spectrum.Query.FileScan.Experimental; var lst = rawmap[oldraw]; foreach (var otherraw in lst) { if (otherraw.Equals(oldraw)) { continue; } var newspectrum = format.PeptideFormat.ParseString(str); newspectrum.Query.FileScan.Experimental = otherraw; newspectrum.Query.FileScan.FirstScan = scan; newspectrum.Query.FileScan.LastScan = scan; newspectrum.SetExtendedIdentification(true); result.Add(newspectrum); spectrum.AddDuplicatedSpectrum(newspectrum); } } } return(result); }
public override IEnumerable <string> Process(string fileName) { Progress.SetMessage("Reading sequences from " + database + " ..."); var seqs = SequenceUtils.Read(new FastaFormat(), database); seqs.RemoveAll(m => m.Name.StartsWith("rev_") || !m.Name.Contains("|#")); var format = new MascotPeptideTextFormat(); Progress.SetMessage("Procesing peptides from " + Path.GetFileName(fileName) + " ..."); var peptides = format.ReadFromFile(fileName); Progress.SetRange(0, peptides.Count); foreach (var peptide in peptides) { Progress.Increment(1); var pureSeq = peptide.Annotations["PureSequence"] as string; foreach (var seq in seqs) { if (seq.SeqString.Contains(pureSeq)) { peptide.Annotations["MutDB"] = seq.Name; break; } } } var result = fileName + ".mutdb"; using (StreamWriter sw = new StreamWriter(fileName + ".mutdb")) { sw.WriteLine(format.PeptideFormat.GetHeader() + "\tMutDB"); foreach (var peptide in peptides) { sw.Write(format.PeptideFormat.GetString(peptide)); if (peptide.Annotations.ContainsKey("MutDB")) { sw.WriteLine("\t" + peptide.Annotations["MutDB"]); } else { sw.WriteLine("\t"); } } } return(new string[] { result }); }
private string[] DoStatistic(string fileName, Aminoacids aas, MascotPeptideTextFormat format, Dictionary <string, Sequence> proMap, IClassification <IIdentifiedPeptide> classification, string mutHeader, MascotPeptideTextFormat mutPepFormat, List <List <IGrouping <string, IIdentifiedPeptide> > > curtype, string curname) { var pairedMut = (from r in curtype where r.Count > 1 select r).ToList(); var dic = pairedMut.GroupBy(m => GetMaxScore(m[0]).Spectrum.Query.FileScan.LongFileName); var pairedOne2OneMut = (from d in dic where d.Count() == 1 from s in d select s).ToList(); var pairedOne2OneFile = fileName + curname + ".paired.one2one.mut"; var pairedOne2OnePeptideFile = OutputPairedResult(aas, format, proMap, classification, mutHeader, mutPepFormat, pairedOne2OneMut, pairedOne2OneFile); var pairedOne2MultipleMut = pairedMut.Except(pairedOne2OneMut).OrderBy(m => GetMaxScore(m[0]).Spectrum.Query.FileScan.LongFileName).ToList(); var pairedOne2MultipleFile = fileName + curname + ".paired.one2multiple.mut"; var pairedOne2MultiplePeptideFile = OutputPairedResult(aas, format, proMap, classification, mutHeader, mutPepFormat, pairedOne2MultipleMut, pairedOne2MultipleFile); var unpairedFile = fileName + curname + ".unpaired.mut"; var unpairedMut = (from r in curtype where r.Count == 1 select r).ToList(); using (StreamWriter sw = new StreamWriter(unpairedFile)) { sw.WriteLine("Index\t" + mutHeader + "\tSequence\tPepCount"); int resIndex = 0; foreach (var res in unpairedMut) { resIndex++; var curMutSpectrum = GetMaxScore(res[0]); var mutSeq = curMutSpectrum.PureSequence; sw.WriteLine("${0}\t{1}\t{2}\t{3}", resIndex, mutPepFormat.PeptideFormat.GetString(curMutSpectrum.Spectrum), mutSeq, res[0].Count()); } } var unpairedPeptideFile = unpairedFile + ".peptides"; SavePeptidesFile(unpairedMut, format, unpairedPeptideFile); return(new string[] { pairedOne2OneFile, pairedOne2OnePeptideFile, pairedOne2MultipleFile, pairedOne2MultiplePeptideFile, unpairedFile, unpairedPeptideFile }); }
public override IEnumerable <string> Process(string fileName) { Progress.SetMessage("Reading mutation file ..."); var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(fileName); var quanFormat = new MascotResultTextFormat(); quanFormat.Progress = this.Progress; Progress.SetMessage("Reading quantification file ..."); var ir = quanFormat.ReadFromFile(quantificationFile); if (ir.Count == 0) { throw new Exception("No quantification found!"); } foreach (var pep in spectra) { var mutSeq = pep.Peptide.PureSequence.Replace('I', 'L'); var mutProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(mutSeq))); if (mutProtein != null) { AddRatio(pep, mutProtein, "MUL_"); } var oriSeq = pep.Annotations["OriginalSequence"] as string; var oriProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(oriSeq))); if (oriProtein != null) { AddRatio(pep, oriProtein, "ORI_"); } } format.Initialize(spectra); var result = fileName + ".quantification"; Progress.SetMessage("Writing peptide quantification file ..."); format.WriteToFile(result, spectra); return(new string[] { result }); }
private IFileReader <List <IIdentifiedSpectrum> > GetPeptideReader() { IFileReader <List <IIdentifiedSpectrum> > result; using (var sr = new StreamReader(options.InputFile)) { var header = sr.ReadLine(); if (header.Contains("PredictionRetentionTime")) { result = new RetentionTimePredictionFormat(); } else { result = new MascotPeptideTextFormat(); } } return(result); }
public void WriteToFile(string proteinFile, IIdentifiedResult mr) { var proteinWriter = new IdentifiedProteinTextWriter(GetProteinHeader()); using (var sw = new StreamWriter(proteinFile)) { sw.WriteLine("\tName\tDescription" + proteinWriter.GetHeader()); foreach (IIdentifiedProteinGroup mpg in mr) { if (mpg[0].IsEnabled(true)) { mpg[0].InitUniquePeptideCount(mph => mph.Spectrum.IsEnabled(true)); this.WriteFunction(sw, mpg, proteinWriter); } } } string peptideFile = GetPeptideFileName(proteinFile); var peptideWriter = new MascotPeptideTextFormat(GetPeptideHeader()); using (var sw = new StreamWriter(peptideFile)) { sw.WriteLine(peptideWriter.PeptideFormat.GetHeader()); foreach (IIdentifiedProteinGroup mpg in mr) { if (mpg[0].IsEnabled(true)) { foreach (IIdentifiedSpectrum mph in mpg[0].GetSpectra()) { if (mph.IsEnabled(false)) { sw.WriteLine(peptideWriter.PeptideFormat.GetString(mph)); } } } } } }
public override IEnumerable <string> Process(string fileName) { var pep1 = new MascotPeptideTextFormat().ReadFromFile(peptideFile1).ToDictionary(m => m.Query.FileScan.LongFileName); var pep2 = new MascotPeptideTextFormat().ReadFromFile(peptideFile2).ToDictionary(m => m.Query.FileScan.LongFileName); var commonSpectra = pep1.Keys.Intersect(pep2.Keys).ToList(); commonSpectra.Sort(); using (StreamWriter sw = new StreamWriter(fileName)) { sw.WriteLine("FileScan\t" + Path.GetFileNameWithoutExtension(peptideFile1) + "\t" + Path.GetFileNameWithoutExtension(peptideFile2) + "\tDeltaScore"); foreach (var spectrum in commonSpectra) { sw.WriteLine("{0}\t{1:0.00}\t{2:0.00}\t{3:0.00}", spectrum, pep1[spectrum].Score, pep2[spectrum].Score, pep2[spectrum].Score - pep1[spectrum].Score); } } return(new string[] { fileName }); }
public override IEnumerable <string> Process() { var evidences = new MascotPeptideTextFormat().ReadFromFile(options.InputFile); //Remove the PSM without mapped to proteins, usually it is from decoy database. evidences.RemoveAll(m => string.IsNullOrWhiteSpace(m.Annotations["Proteins"] as string)); if (options.RemoveContanimant) { evidences.RemoveAll(m => (m.Annotations["Proteins"] as string).Contains("CON_")); } foreach (var spectrum in evidences) { ParseMaxQuantEvidencePeptide(spectrum); } new MascotPeptideTextFormat("\tFileScan\tSequence\tObs\tMH+\tDiff(MH+)\tDiffPPM\tCharge\tRank\tScore\tExpectValue\tReference\tMissCleavage\tModification\tMatchCount\tNumProteaseTermini").WriteToFile(options.OutputFile, evidences); return(new string[] { options.OutputFile }); }
protected override IFileProcessor GetFileProcessor() { format = new MascotPeptideTextFormat(); peptides = format.ReadFromFile(base.GetOriginFile()); if (bFirstLoad) { var allColumns = format.PeptideFormat.GetHeader().Split('\t').ToList(); var lvColumns = lvPeptides.GetColumnList().ConvertAll(m => m.Text); if (lvColumns.Count > 0) { this.peptideIgnoreKeys = allColumns.Except(lvColumns).ToList(); } bFirstLoad = false; } FillListViewColumns(this.lvPeptides, format.PeptideFormat.GetHeader(), this.peptideIgnoreKeys, this.peptideIgnoreKeyIndecies); UpdatePeptides(); var chros = (from p in peptides select SpectrumToChro(p)).ToList(); for (int i = chros.Count - 1; i >= 0; i--) { for (int j = i - 1; j >= 0; j--) { if ((chros[i].Sequence == chros[j].Sequence) && (chros[i].Charge == chros[j].Charge) && (Math.Abs(chros[i].Mz - chros[j].Mz) < 0.0001)) { chros.RemoveAt(j); break; } } } lvPeptides.SelectedIndexChanged -= lvPeptides_SelectedIndexChanged; return(new ProteinChromatographProcessor(chros, new string[] { rawFile.FullName }.ToList(), new RawFileImpl(), ppmTolerance.Value, window.Value, false)); }
protected override IEnumerable <string> DoProcess(string filename, List <string> result, Dictionary <IFilter <IIdentifiedSpectrum>, SpectrumEntry> map) { try { var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(filename); foreach (IFilter <IIdentifiedSpectrum> filter in map.Keys) { SpectrumEntry entry = map[filter]; foreach (IIdentifiedSpectrum spectrum in spectra) { if (filter.Accept(spectrum)) { entry.Spectra.Add(spectrum); } } if (entry.Spectra.Count > 0) { entry.ResultWriter.WriteLine(format.PeptideFormat.GetHeader()); entry.Spectra.ForEach(m => entry.ResultWriter.WriteLine(format.PeptideFormat.GetString(m))); } } return(result); } finally { foreach (SpectrumEntry entry in map.Values) { entry.Dispose(); } } }
public override IEnumerable <string> Process() { var spectra = new PercolatorOutputXmlPsmReader().ReadFromFile(_options.PercolatorOutputFile); var inputspec = new PercolatorInputXmlPsmReader().ReadFromFile(_options.PercolatorInputFile); var scanMap = inputspec.ToDictionary(m => GetPsmId(m)); spectra.ForEach(m => { var psmid = GetPsmId(m); var inputScan = scanMap[psmid]; m.Query.QueryId = inputScan.Query.QueryId; m.Query.FileScan.FirstScan = m.Query.QueryId; m.Query.FileScan.LastScan = m.Query.QueryId; m.Query.Charge = inputScan.Query.Charge; m.ExperimentalMH = inputScan.ExperimentalMH; m.TheoreticalMH = inputScan.TheoreticalMH; m.NumMissedCleavages = inputScan.NumMissedCleavages; m.Score = inputScan.Score; }); var specMap = spectra.GroupBy(m => m.Query.QueryId).ToList(); var result = new List <IIdentifiedSpectrum>(); foreach (var spec in specMap) { if (spec.Count() == 1) { result.Add(spec.First()); } else { var lst = spec.OrderByDescending(m => m.SpScore).ToList(); if (lst[1].SpScore < lst[0].SpScore) { result.Add(lst[0]); } else { if (lst[0].FromDecoy) { result.Add(lst[0]); } else if (lst[1].FromDecoy) { result.Add(lst[1]); } else { lst[0].AddPeptide(lst[1].Peptide); result.Add(lst[0]); } } } } result.Sort((m1, m2) => m2.SpScore.CompareTo(m1.SpScore)); var format = new MascotPeptideTextFormat("QueryId\tSpectrumId\tFileScan\tSequence\tCharge\tScore\tSvmScore\tMissCleavage\tQValue\tTheoreticalMH\tExperimentMH\tTarget/Decoy"); var targetFile = _options.PercolatorOutputFile + ".peptides"; format.WriteToFile(targetFile, result); new QValueCalculator(new PercolatorScoreFunction(), new TargetFalseDiscoveryRateCalculator()).CalculateQValue(result); result.RemoveAll(m => m.QValue >= 0.01); var target001file = FileUtils.ChangeExtension(targetFile, ".FDR0.01.peptides"); format.WriteToFile(target001file, result); return(new[] { targetFile }); }
private void RunCurrentParameter(string parameterFile, List <string> result, BuildSummaryOptions conf) { IStringParser <string> acParser = conf.Database.GetAccessNumberParser(); IIdentifiedProteinBuilder proteinBuilder = new IdentifiedProteinBuilder(); IIdentifiedProteinGroupBuilder groupBuilder = new IdentifiedProteinGroupBuilder() { Progress = this.Progress }; IdentifiedSpectrumBuilderResult isbr; List <IIdentifiedSpectrum> finalPeptides; if (string.IsNullOrEmpty(options.PeptideFile)) { //parse from configuration //build spectrum list IIdentifiedSpectrumBuilder spectrumBuilder = conf.GetSpectrumBuilder(); if (spectrumBuilder is IProgress) { (spectrumBuilder as IProgress).Progress = this.Progress; } isbr = spectrumBuilder.Build(parameterFile); finalPeptides = isbr.Spectra; } else { Progress.SetMessage("Reading peptides from {0} ...", options.PeptideFile); finalPeptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile); conf.SavePeptidesFile = false; isbr = null; } CalculateIsoelectricPoint(finalPeptides); //如果需要通过蛋白质注释去除contamination,首先需要在肽段水平删除 if (conf.Database.HasContaminationDescriptionFilter() && (conf.FalseDiscoveryRate.FdrLevel != FalseDiscoveryRateLevel.Protein)) { Progress.SetMessage("Removing contamination by description ..."); var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress); var tempResultBuilder = new IdentifiedResultBuilder(null, null); while (true) { List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides); List <IIdentifiedProteinGroup> groups = groupBuilder.Build(proteins); IIdentifiedResult tmpResult = tempResultBuilder.Build(groups); HashSet <IIdentifiedSpectrum> notConSpectra = new HashSet <IIdentifiedSpectrum>(); foreach (var group in tmpResult) { if (notConGroupFilter.Accept(group)) { notConSpectra.UnionWith(group[0].GetSpectra()); } } if (notConSpectra.Count == finalPeptides.Count) { break; } finalPeptides = notConSpectra.ToList(); } } if (conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1) { Progress.SetMessage("Filtering single wonders ..."); var proteinFilter = new IdentifiedProteinSingleWonderPeptideCountFilter(conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount); List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides); int oldProteinCount = proteins.Count; proteins.RemoveAll(l => !proteinFilter.Accept(l)); if (oldProteinCount != proteins.Count) { HashSet <IIdentifiedSpectrum> newspectra = new HashSet <IIdentifiedSpectrum>(); foreach (var protein in proteins) { newspectra.UnionWith(protein.GetSpectra()); } finalPeptides = newspectra.ToList(); } } //if (conf.SavePeptidesFile && !(conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1)) if (conf.SavePeptidesFile) { if (conf.Database.RemovePeptideFromDecoyDB) { DecoyPeptideBuilder.AssignDecoy(finalPeptides, conf.GetDecoySpectrumFilter()); for (int i = finalPeptides.Count - 1; i >= 0; i--) { if (finalPeptides[i].FromDecoy) { finalPeptides.RemoveAt(i); } } } finalPeptides.Sort(); //保存肽段文件 IFileFormat <List <IIdentifiedSpectrum> > peptideFormat = conf.GetIdentifiedSpectrumFormat(); string peptideFile = FileUtils.ChangeExtension(parameterFile, ".peptides"); Progress.SetMessage("Writing peptides file..."); peptideFormat.WriteToFile(peptideFile, finalPeptides); result.Add(peptideFile); if (!conf.FalseDiscoveryRate.FilterByFdr && conf.Database.DecoyPatternDefined) { WriteFdrFile(parameterFile, conf, finalPeptides); } Progress.SetMessage("Calculating precursor offset..."); result.AddRange(new PrecursorOffsetCalculator(finalPeptides).Process(peptideFile)); } Progress.SetMessage("Building protein..."); //构建蛋白质列表 List <IIdentifiedProtein> finalProteins = proteinBuilder.Build(finalPeptides); Progress.SetMessage("Building protein group..."); //构建蛋白质群列表 List <IIdentifiedProteinGroup> finalGroups = groupBuilder.Build(finalProteins); if (conf.Database.HasContaminationDescriptionFilter()) { var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress); for (int i = finalGroups.Count - 1; i >= 0; i--) { if (!notConGroupFilter.Accept(finalGroups[i])) { finalGroups.RemoveAt(i); } } } //构建最终鉴定结果 var resultBuilder = conf.GetIdentifiedResultBuilder(); resultBuilder.Progress = Progress; IIdentifiedResult finalResult = resultBuilder.Build(finalGroups); finalResult.BuildGroupIndex(); if (conf.FalseDiscoveryRate.FilterByFdr) { var decoyGroupFilter = conf.GetDecoyGroupFilter(); foreach (var group in finalResult) { group.FromDecoy = decoyGroupFilter.Accept(group); foreach (var protein in group) { protein.FromDecoy = group.FromDecoy; } } finalResult.ProteinFDR = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator().Calculate(finalResult.Count(l => l[0].FromDecoy), finalResult.Count(l => !l[0].FromDecoy)); } CalculateIsoelectricPoint(finalResult.GetProteins()); if (isbr != null) { finalResult.PeptideFDR = isbr.PeptideFDR; } //保存非冗余蛋白质列表文件 var resultFormat = conf.GetIdetifiedResultFormat(finalResult, this.Progress); string noredundantFile = FileUtils.ChangeExtension(parameterFile, ".noredundant"); Progress.SetMessage("Writing noredundant file..."); resultFormat.WriteToFile(noredundantFile, finalResult); result.Add(noredundantFile); Progress.SetMessage("Finished!"); }
public List <IIdentifiedSpectrum> ReadFromFile(string fileName) { var result = new MascotPeptideTextFormat().ReadFromFile(fileName); FilterSpectra(result); UpdateModifications(result); foreach (var peptide in result) { peptide.Peptide.AssignProteins((peptide.Annotations[PROTEIN_KEY] as string).Split(';')); peptide.Annotations.Remove(PROTEIN_KEY); peptide.TheoreticalMass = peptide.ExperimentalMass; } var i = 0; while (i < result.Count - 1) { var ititle = result[i].Annotations[TITLE_KEY] as string; while (i < result.Count - 1) { var jtitle = result[i + 1].Annotations[TITLE_KEY] as string; if (!ititle.Equals(jtitle)) { i++; break; } for (int l = result[i + 1].Peptides.Count - 1; l >= 0; l--) { result[i].AddPeptide(result[i + 1].Peptides[l]); } result.RemoveAt(i + 1); } } foreach (var peptide in result) { var title = peptide.Annotations[TITLE_KEY] as string; peptide.Annotations.Remove(TITLE_KEY); var oldCharge = peptide.Query.FileScan.Charge; peptide.Query.FileScan = TitleParser.GetValue(title); peptide.Query.FileScan.Charge = oldCharge; if (string.IsNullOrEmpty(peptide.Query.FileScan.Experimental)) { peptide.Query.FileScan.Experimental = Path.GetFileNameWithoutExtension(fileName); } var rtstr = peptide.Annotations[RT_KEY] as string; if (!string.IsNullOrWhiteSpace(rtstr)) { peptide.Query.FileScan.RetentionTime = double.Parse(rtstr.StringBefore("-")); } peptide.Annotations.Remove(RT_KEY); } return(result); }
public override IEnumerable <string> Process() { //Extract chromotagraph information var chroOptions = new ChromatographProfileBuilderOptions(); options.CopyProperties(chroOptions); chroOptions.InputFile = options.InputFile; chroOptions.OutputFile = options.BoundaryOutputFile; chroOptions.DrawImage = false; var builder = new ChromatographProfileBuilder(chroOptions); if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite) { Progress.SetMessage("Finding envelope ..."); builder.Progress = this.Progress; builder.Process(); } //Calculate deuterium enrichment for peptide if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite) { Progress.SetMessage("Calculating deuterium ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = options.BoundaryOutputFile, OutputFile = options.DeuteriumOutputFile, RTemplate = DeuteriumR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0")); new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); } var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString()); //Read old spectra information var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(options.InputFile); foreach (var spec in spectra) { spec.Annotations.Remove("RetentionTime"); spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var calcSpectra = new List <IIdentifiedSpectrum>(); var aas = new Aminoacids(); foreach (var pep in spectra) { var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep)); if (deuteriumMap.ContainsKey(filename)) { var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence); var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string); pep.Annotations["PeakRetentionTime"] = deuteriumMap[filename].Annotations["RetentionTime"]; pep.Annotations["TheoreticalDeuterium"] = deuteriumMap[filename].Annotations["TheoreticalDeuterium"]; pep.Annotations["ObservedDeuterium"] = deuteriumMap[filename].Annotations["ObservedDeuterium"]; pep.Annotations["NumDeuteriumIncorporated"] = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"]; pep.Annotations["NumExchangableHydrogen"] = numExchangeableHydrogens; pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens; calcSpectra.Add(pep); } } format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent"; format.NotExportSummary = true; format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra); var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList(); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); using (var sw = new StreamWriter(options.OutputFile)) { sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var peptide in specGroup) { var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray()); if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l))) { continue; } sw.Write(peptide.Key); foreach (var time in times) { if (curSpectra.ContainsKey(time)) { var deps = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Peptide deuterium enrichment calculation finished ..."); return(new string[] { options.OutputFile }); }
public override IEnumerable <string> Process() { //Prepare unique peptide file var format = new MascotResultTextFormat(); var proteins = format.ReadFromFile(options.InputFile); proteins.RemoveAmbiguousSpectra(); var spectra = proteins.GetSpectra(); foreach (var spec in spectra) { spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var peptideFile = Path.ChangeExtension(options.InputFile, ".unique.peptides"); var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers); peptideFormat.WriteToFile(peptideFile, spectra); //Calculate deterium enrichment at peptide level var pepOptions = new DeuteriumCalculatorOptions(); options.CopyProperties(pepOptions); pepOptions.InputFile = peptideFile; pepOptions.OutputFile = peptideFile + ".tsv"; var pepCalc = new PeptideDeuteriumCalculator(pepOptions); pepCalc.Progress = this.Progress; pepCalc.Process(); //Copy annotation from calculated peptide to original peptide var calcSpectra = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile()); var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName); foreach (var calcSpec in calcSpectra) { var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName]; foreach (var ann in calcSpec.Annotations) { oldSpec.Annotations[ann.Key] = ann.Value; } } //Remove the peptide not contain calculation result for (int i = proteins.Count - 1; i >= 0; i--) { foreach (var protein in proteins[i]) { protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent")); } if (proteins[i][0].Peptides.Count == 0) { proteins.RemoveAt(i); } } format.PeptideFormat = peptideFormat.PeptideFormat; var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv"); format.WriteToFile(noredundantFile, proteins); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv"); using (var sw = new StreamWriter(timeFile)) { sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var protein in proteins) { var curSpectra = protein[0].GetSpectra(); if (options.PeptideInAllTimePointOnly) { var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence); curSpectra.Clear(); foreach (var peps in curMap.Values) { var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); if (times.All(time => pepMap.ContainsKey(time))) { curSpectra.AddRange(peps); } } } if (curSpectra.Count == 0) { continue; } sw.Write((from p in protein select p.Name).Merge("/")); var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); foreach (var time in times) { if (curTimeMap.ContainsKey(time)) { var deps = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Calculating ratio consistant ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = timeFile, OutputFile = options.OutputFile, RTemplate = RatioR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); Progress.SetMessage("Finished ..."); return(new string[] { options.OutputFile }); }
public void TestCalculateQValue() { var peptides = new MascotPeptideTextFormat().ReadFromFile("../../../data/QTOF_Ecoli.LowRes.t.xml.peptides"); peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6); peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_"))); IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator()); Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001); }
public void TestSameEngineDifferentParameters() { ClassificationOptions co = new ClassificationOptions(); co.ClassifyByCharge = true; co.ClassifyByMissCleavage = true; co.ClassifyByModification = true; co.ModifiedAminoacids = "STY"; co.ClassifyByNumProteaseTermini = true; var s1 = new MascotPeptideTextFormat().ReadFromFile(@"../../../data/deisotopic.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1); s1.ForEach(m => m.Tag = "deisotopic"); var s2 = new MascotPeptideTextFormat().ReadFromFile(@"../../../data/deisotopic-top10.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2); s2.ForEach(m => m.Tag = "deisotopic-top"); var all = s1.Union(s2).ToList(); var p1 = new List<IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction()); p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin1 = co.BuildSpectrumBin(p1); var p2 = new List<IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction()); p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin2 = co.BuildSpectrumBin(p2); bin2.ForEach(m => { IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction()); var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString())); Assert.AreEqual(m.Spectra.Count, n.Spectra.Count); //{ // if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2")) // { // Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992"))); // } // var diff1 = m.Spectra.Except(n.Spectra).ToList(); // Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString()); // diff1.ForEach(k => // { // var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName)); // lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence)); // }); //} }); }
public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc) { IdentificationSummary result = new IdentificationSummary(); result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, ""); Regex decoyReg = new Regex(defaultDecoyPattern); IIdentifiedProteinGroupFilter decoyFilter = null; IFalseDiscoveryRateCalculator curCalc = null; var paramFile = FileUtils.ChangeExtension(proteinFile, ".param"); if (File.Exists(paramFile)) { BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile); if (options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = options.GetDecoyGroupFilter(); curCalc = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); } } if (decoyFilter == null) { decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false); curCalc = defaultCalc; } var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides"); if (File.Exists(peptideFile)) { var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var fullSpectra = GetSpectraByNPT(peptides, 2); var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra); var semiSpectra = GetSpectraByNPT(peptides, 1); var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra); result.FullSpectrumCount = GetSpectrumCount(fullSpectra); result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra); result.SemiSpectrumCount = GetSpectrumCount(semiSpectra); result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra); result.FullPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra); result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra); result.SemiPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra); result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra); result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount); result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount); result.FullPeptideFdr = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount); result.SemiPeptideFdr = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount); } if (File.Exists(proteinFile)) { var ir = new MascotResultTextFormat().ReadFromFile(proteinFile); ir.InitUniquePeptideCount(); var u2proteins = (from p in ir where p[0].UniquePeptideCount > 1 select p).ToList(); var u1proteins = (from p in ir where p[0].UniquePeptideCount == 1 select p).ToList(); result.ProteinGroupCount = ir.Count; result.Unique2ProteinGroupCount = u2proteins.Count; int targetCount; result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount); result.Unique2ProteinGroupTargetCount = (int)targetCount; result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount); result.Unique1ProteinGroupTargetCount = (int)targetCount; } return(result); }
public override IEnumerable <string> Process(string fileName) { var peps = new MascotPeptideTextFormat().ReadFromFile(fileName); peps.RemoveAll(m => !(m.Annotations["Number of Phospho (STY)"] as string).Equals("1")); var silac = new MascotResultTextFormat().ReadFromFile(silacFile); var silacPeps = silac.GetSpectra(); silacPeps.RemoveAll(m => m.GetQuantificationItem() == null || !m.GetQuantificationItem().HasRatio); Regex reg = new Regex(@"Cx_(.+)"); var silacMap = silacPeps.ToGroupDictionary(m => m.Peptide.PureSequence + GetModificationCount(m.Peptide, "STY")); int found = 0; int missed = 0; var matchFile = fileName + ".match"; using (StreamWriter sw = new StreamWriter(matchFile)) { sw.Write("Sequence"); var mq = peps[0].GetMaxQuantItemList(); foreach (var mqi in mq) { sw.Write("\tm_" + mqi.Name); sw.Write("\ts_" + mqi.Name); } sw.WriteLine(); foreach (var p in peps) { var pureSeqKey = p.Peptide.PureSequence + p.Annotations["Number of Phospho (STY)"].ToString(); if (silacMap.ContainsKey(pureSeqKey)) { found++; Console.WriteLine("Find - " + pureSeqKey); var findPep = silacMap[pureSeqKey]; var findPepMap = findPep.ToGroupDictionary(m => reg.Match(m.Query.FileScan.Experimental).Groups[1].Value); mq = p.GetMaxQuantItemList(); sw.Write(p.Peptide.PureSequence); foreach (var mqi in mq) { if (string.IsNullOrEmpty(mqi.Ratio)) { sw.Write("\t"); } else { sw.Write("\t{0:0.00}", Math.Log(MyConvert.ToDouble(mqi.Ratio))); } if (!findPepMap.ContainsKey(mqi.Name)) { sw.Write("\t"); } else { var spectra = findPepMap[mqi.Name]; spectra.Sort((m1, m2) => m2.GetQuantificationItem().Correlation.CompareTo(m1.GetQuantificationItem().Correlation)); sw.Write("\t{0:0.00}", -Math.Log(spectra[0].GetQuantificationItem().Ratio)); } } sw.WriteLine(); } else { missed++; Console.WriteLine("Missed - " + pureSeqKey); } } } Console.WriteLine("Found = {0}; Missed = {1}", found, missed); // Regex reg =new Regex(@"Cx_(.+)"); return(new string[] { }); }
public void TestFillProteinInformation() { var peptides = new MascotPeptideTextFormat().ReadFromFile("../../../data/Test.output.xml.FDR0.01.peptides"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0)); IdentifiedSpectrumUtils.FillProteinInformation(peptides, "../../../data/Test.output.xml.FDR0.01.peptides.proteins"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0)); }
public override IEnumerable <string> Process() { var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m)); Progress.SetMessage("Reading library file ..."); var liblist = new MS2ItemXmlFormat().ReadFromFile(options.LibraryFile); PreprocessingMS2ItemList(liblist); var lib = liblist.GroupBy(m => m.Charge).ToDictionary(m => m.Key, m => m.ToList()); Progress.SetMessage("Building library sequence amino acid composition ..."); lib.ForEach(m => m.Value.ForEach(l => l.AminoacidCompsition = (from a in l.Peptide where options.SubstitutionDeltaMassMap.ContainsKey(a) select a).Distinct().OrderBy(k => k).ToArray())); var expScanMap = (from p in liblist from sq in p.FileScans select sq).ToList().GroupBy(m => m.Experimental).ToDictionary(m => m.Key, m => new HashSet <int>(from l in m select l.FirstScan)); if (File.Exists(options.PeptidesFile)) { Progress.SetMessage("Reading peptides file used for excluding scan ..."); var peptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptidesFile); foreach (var pep in peptides) { HashSet <int> scans; if (!expScanMap.TryGetValue(pep.Query.FileScan.Experimental, out scans)) { scans = new HashSet <int>(); expScanMap[pep.Query.FileScan.Experimental] = scans; } scans.Add(pep.Query.FileScan.FirstScan); } } Progress.SetMessage("Reading MS2/MS3 data ..."); var result = GetCandidateMs2ItemList(expRawfileMap, expScanMap); PreprocessingMS2ItemList(result); //new MS2ItemXmlFormat().WriteToFile(options.OutputFile + ".xml", result); Progress.SetMessage("Finding SAP ..."); List <SapPredicted> predicted = new List <SapPredicted>(); var minDeltaMass = options.SubstitutionDeltaMassMap.Values.Min(l => l.Min(k => k.DeltaMass)); var maxDeltaMass = options.SubstitutionDeltaMassMap.Values.Max(l => l.Max(k => k.DeltaMass)); Progress.SetRange(0, result.Count); Progress.Begin(); FindCandidates(lib, result, predicted, minDeltaMass, maxDeltaMass); var groups = predicted.ToGroupDictionary(m => m.Ms2.GetFileScans()); predicted.Clear(); foreach (var g in groups.Values) { var gg = g.ToGroupDictionary(m => m.LibMs2).Values.ToList(); gg.Sort((m1, m2) => { return(CompareSapPrecitedList(m1, m2)); }); var expect = gg[0].FirstOrDefault(m => m.IsExpect); if (expect != null) { predicted.Add(expect); } else { predicted.AddRange(gg[0]); for (int i = 1; i < gg.Count; i++) { if (CompareSapPrecitedList(gg[0], gg[i]) == 0) { predicted.AddRange(gg[i]); } else { break; } } } } if (File.Exists(options.MatchedFile)) { new SapPredictedValidationWriter(options.MatchedFile).WriteToFile(options.OutputFile, predicted); } else { new SapPredictedWriter().WriteToFile(options.OutputTableFile, predicted); Progress.SetMessage("Generating SAP sequence ..."); List <Sequence> predictedSeq = new List <Sequence>(); foreach (var predict in predicted) { var seq = PeptideUtils.GetPureSequence(predict.LibMs2.Peptide); if (predict.Target.TargetType == VariantType.SingleAminoacidPolymorphism) { for (int i = 0; i < seq.Length; i++) { if (seq[i] == predict.Target.Source[0]) { foreach (var t in predict.Target.Target) { string targetSeq; if (i == 0) { targetSeq = t + seq.Substring(1); } else { targetSeq = seq.Substring(0, i) + t + seq.Substring(i + 1); } var reference = string.Format("sp|SAP_{0}_{1}|{2}_{3}_{4}_{5}", targetSeq, predict.Target.TargetType, seq, predict.Target.Source, i + 1, t); predictedSeq.Add(new Sequence(reference, targetSeq)); } } } } else { foreach (var tseq in predict.Target.Target) { string reference; if (predict.Target.TargetType == VariantType.NTerminalLoss) { reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(0, seq.Length - tseq.Length)); } else if (predict.Target.TargetType == VariantType.CTerminalLoss) { reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(tseq.Length)); } else if (predict.Target.TargetType == VariantType.NTerminalExtension) { reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(0, tseq.Length - seq.Length)); } else if (predict.Target.TargetType == VariantType.CTerminalExtension) { reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(seq.Length)); } else { throw new Exception("I don't know how to deal with " + predict.Target.TargetType.ToString()); } predictedSeq.Add(new Sequence(reference, tseq)); } } } predictedSeq = (from g in predictedSeq.GroupBy(m => m.SeqString) select g.First()).ToList(); Progress.SetMessage("Reading database {0} ...", options.DatabaseFastaFile); var databases = SequenceUtils.Read(options.DatabaseFastaFile); Progress.SetMessage("Removing variant sequences which are already existed in database ..."); for (int i = predictedSeq.Count - 1; i >= 0; i--) { foreach (var db in databases) { if (db.SeqString.Contains(predictedSeq[i].SeqString)) { predictedSeq.RemoveAt(i); break; } } } databases.AddRange(predictedSeq); Progress.SetMessage("Writing SAP sequence and original database to {0} ...", options.OutputFile); SequenceUtils.Write(new FastaFormat(), options.OutputFile, databases); } Progress.End(); return(new string[] { options.OutputFile, options.OutputTableFile }); }
public MaxQuantPeptidesMerger(Dictionary <string, List <string> > sourceFiles) { this.format = new MascotPeptideTextFormat(); this.sourceFiles = sourceFiles; }