public override IEnumerable <string> Process() { var design = new IsobaricLabelingExperimentalDesign(); design.LoadFromFile(options.ExpermentalDesignFile); string resultFileName = GetResultFilePrefix(options.ProteinFileName, design.GetReferenceNames("")); string paramFileName = Path.ChangeExtension(resultFileName, ".param"); options.SaveToFile(paramFileName); Progress.SetMessage("Reading proteins..."); IIdentifiedResult ir = new MascotResultTextFormat().ReadFromFile(options.ProteinFileName); var proteinpeptidefile = string.Format("{0}.pro_pep.tsv", resultFileName); using (var sw = new StreamWriter(proteinpeptidefile)) { sw.WriteLine("Index\tPeptide\tProteins\tDescription\tPepCount\tUniquePepCount"); foreach (var g in ir) { var peps = g.GetPeptides(); var seqs = (from p in peps select p.Peptide.PureSequence).Distinct().OrderBy(m => m).ToArray(); var proname = (from p in g select p.Name).Merge(" ! "); var description = (from p in g select p.Description).Merge(" ! "); foreach (var seq in seqs) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", g.Index, seq, proname, description, g[0].PeptideCount, g[0].UniquePeptideCount); } } } Progress.SetMessage("Quantifing proteins..."); var qoptions = new RTemplateProcessorOptions(); qoptions.InputFile = options.QuanPeptideFileName; qoptions.OutputFile = resultFileName + ".quan." + options.PeptideToProteinMethod + ".tsv"; qoptions.RTemplate = string.Format("{0}/ProteinQuantification.r", FileUtils.GetTemplateDir(), options.PeptideToProteinMethod); qoptions.Parameters.Add(string.Format("proteinfile<-\"{0}\"", proteinpeptidefile.Replace("\\", "/"))); qoptions.Parameters.Add(string.Format("method<-\"{0}\"", options.PeptideToProteinMethod)); qoptions.Parameters.Add("pvalue<-0.01"); qoptions.Parameters.Add("minFinalCount<-3"); new RTemplateProcessor(qoptions).Process(); Progress.SetMessage("Finished."); return(new[] { qoptions.OutputFile }); }
public override IEnumerable <string> Process(string fileName) { var ir = new MascotResultTextFormat().ReadFromFile(fileName); ResultCorrelationItem rci = BuildResult(ir); var result = fileName + ".corr"; using (StreamWriter sw = new StreamWriter(result)) { sw.Write("Index\tName"); foreach (var title in rci.ClassificationTitles) { sw.Write("\t" + title); } sw.WriteLine("\tCorrelation"); foreach (var pro in rci) { sw.Write("{0}\t", pro.Index); PrintCorrelationItem(sw, pro.Protein); foreach (var pep in pro.Peptides) { sw.Write("\t"); PrintCorrelationItem(sw, pep); } } } return(new string[] { result }); }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); List <IIdentifiedSpectrum> spectra = ir.GetSpectra(); spectra.ForEach(m => { for (int i = m.Peptides.Count - 1; i >= 0; i--) { IIdentifiedPeptide peptide = m.Peptides[i]; string seq = PeptideUtils.GetMatchedSequence(peptide.Sequence); if (!validator.Validate(seq)) { m.RemovePeptideAt(i); peptide.Spectrum = null; } } }); ir.Filter(m => { return(m.Spectrum != null); }); string result = fileName + ".Labeled"; format.WriteToFile(result, ir); return(new[] { result }); }
public override IEnumerable <string> Process(string fileName) { var entries = File.ReadAllLines(fileName); var acNumbers = (from e in entries let l = e.Trim() where l.Length > 0 select l).ToList(); MascotResultTextFormat format = new MascotResultTextFormat(); format.Progress = this.Progress; var ir = format.ReadFromFile(sourceFile); for (int i = ir.Count - 1; i >= 0; i--) { if (!Accept(acNumbers, ir[i])) { ir.RemoveAt(i); } } var result = fileName + ".noredundant"; format.WriteToFile(result, ir); return(new string[] { result }); }
protected override MascotResultTextFormat GetFormat(IIdentifiedResult ir) { var result = new MascotResultTextFormat(); result.PeptideFormat = format.PeptideFormat; result.InitializeByResult(ir); return(result); }
public void TestRead() { var ir = new MascotResultTextFormat().ReadFromFile(datafile); Assert.AreEqual(true, ir[0][0].GetQuantificationItem().Enabled); Assert.AreEqual(8027277.1, ir[0][0].GetQuantificationItem().ReferenceIntensity, 0.1); Assert.AreEqual(303918.6, ir[0][0].GetQuantificationItem().SampleIntensity, 0.1); Assert.AreEqual(0.7830, ir[0][0].GetQuantificationItem().Correlation, 0.0001); Assert.AreEqual(0.0379, ir[0][0].GetQuantificationItem().Ratio, 0.0001); }
public override IEnumerable <string> Process(string filename) { IIdentifiedResult mr = new MascotResultTextFormat().ReadFromFile(filename); string resultFilename = filename + ".dtaselect.txt"; new MascotResultDtaselectFormat().WriteToFile(resultFilename, mr); return(new [] { resultFilename }); }
public override IEnumerable <string> Process(string fileName) { var ir = new MascotResultTextFormat().ReadFromFile(fileName); var oldFormat = new SequestResultTextFormat("\tReference\tPepCount\tUniquePepCount\tCoverPercent\tMW\tPI", "\t\"File, Scan(s)\"\tSequence\tMH+\tDiff(MH+)\tCharge\tRank\tXC\tDeltaCn\tSp\tRSp\tIons\tReference\tDIFF_MODIFIED_CANDIDATE\tPI\tGroupCount\tProteinCount"); var result = fileName + ".tmp"; oldFormat.WriteToFile(result, ir); return(new string[] { result }); }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); KeepDistinctPeptideOnly(ir); string resultFileName = fileName + ".distinct"; format.WriteToFile(resultFileName, ir); return(new [] { resultFileName }); }
public void TestWrite() { var oldFormat = new MascotResultTextFormat(); var ir = oldFormat.ReadFromFile(datafile); oldFormat.WriteToFile(@"../../../data/QuantificationItem.txt", ir); var format = new MascotResultTextFormat("\tReference\tPepCount\tUniquePepCount\tCoverPercent\tMW\tPI", oldFormat.PeptideFormat.GetHeader()); format.InitializeByResult(ir); //format.WriteToFile(@"../../../data/QuantificationItem2.txt", ir); Assert.AreEqual(oldFormat.ProteinFormat.GetHeader(), format.ProteinFormat.GetHeader()); Assert.AreEqual(oldFormat.PeptideFormat.GetHeader(), format.PeptideFormat.GetHeader()); }
public void TestWrite() { var oldFormat = new MascotResultTextFormat(); var ir = oldFormat.ReadFromFile(datafile); oldFormat.WriteToFile(@TestContext.CurrentContext.TestDirectory + "/../../../data//QuantificationItem.txt", ir); var format = new MascotResultTextFormat("\tReference\tPepCount\tUniquePepCount\tCoverPercent\tMW\tPI", oldFormat.PeptideFormat.GetHeader()); format.InitializeByResult(ir); //format.WriteToFile(@TestContext.CurrentContext.TestDirectory + "/../../../data//QuantificationItem2.txt", ir); Assert.AreEqual(oldFormat.ProteinFormat.GetHeader(), format.ProteinFormat.GetHeader()); Assert.AreEqual(oldFormat.PeptideFormat.GetHeader(), format.PeptideFormat.GetHeader()); }
protected override IFileProcessor GetFileProcessor() { double ppmTolerance = precursorPPMTolerance.Value; string rawDirectory; if (rawDir.FullName == "") { rawDirectory = new FileInfo(GetOriginFile()).DirectoryName; } else { rawDirectory = rawDir.FullName; } IIdentifiedResultTextFormat fileFormat; switch (searchEngine.SelectedItem) { case SearchEngineType.MASCOT: fileFormat = new MascotResultTextFormat(); break; case SearchEngineType.SEQUEST: fileFormat = new SequestResultTextFormat(); break; default: throw new Exception(MyConvert.Format("Unsupported search engine {0}, contact with author.", searchEngine.SelectedItem)); } return(new ExtendSilacQuantificationProteinFileProcessor( new SilacQuantificationOption() { RawFormat = rawFormats.SelectedItem, RawDir = rawDirectory, SilacParamFile = silacFile.FullName, PPMTolerance = ppmTolerance, IgnoreModifications = ignoreModifications.Text, ProfileLength = _profileLength.Value }, fileFormat, datasetClassification.GetClassificationSet(), rawPairClassification.GetClassificationSet()) { MinPeptideRegressionCorrelation = minCorrelation.Value }); }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); foreach (IIdentifiedProteinGroup group in ir) { processor.Process(group); } string resultFileName = fileName + ".Unduplicated"; format.WriteToFile(resultFileName, ir); return(new[] { resultFileName }); }
public override IEnumerable <string> Process(string fileName) { Progress.SetMessage("Reading mutation file ..."); var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(fileName); var quanFormat = new MascotResultTextFormat(); quanFormat.Progress = this.Progress; Progress.SetMessage("Reading quantification file ..."); var ir = quanFormat.ReadFromFile(quantificationFile); if (ir.Count == 0) { throw new Exception("No quantification found!"); } foreach (var pep in spectra) { var mutSeq = pep.Peptide.PureSequence.Replace('I', 'L'); var mutProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(mutSeq))); if (mutProtein != null) { AddRatio(pep, mutProtein, "MUL_"); } var oriSeq = pep.Annotations["OriginalSequence"] as string; var oriProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(oriSeq))); if (oriProtein != null) { AddRatio(pep, oriProtein, "ORI_"); } } format.Initialize(spectra); var result = fileName + ".quantification"; Progress.SetMessage("Writing peptide quantification file ..."); format.WriteToFile(result, spectra); return(new string[] { result }); }
public override IEnumerable <string> Process(string fileName) { var result = new ProteinProphetXmlReader().ReadFromFile(fileName); for (int i = result.Count - 1; i >= 0; i--) { if (result[i].Probability < min_probability) { result.RemoveAt(i); } } result.Sort(); result.BuildGroupIndex(); var resultFile = FileUtils.ChangeExtension(fileName, "noredundant"); var format = new MascotResultTextFormat(MascotHeader.PROTEINPROPHET_PROTEIN_HEADER, MascotHeader.PROTEINPROPHET_PEPTIDE_HEADER); format.WriteToFile(resultFile, result); return(new string[] { resultFile }); }
public override IEnumerable<string> Process(string sourceFile) { MascotResultTextFormat format = new MascotResultTextFormat(); format.Progress = this.Progress; var ir = format.ReadFromFile(sourceFile); var result = FileUtils.ChangeExtension(sourceFile, ".proteinonly"); using (StreamWriter sw = new StreamWriter(result)) { sw.WriteLine(format.ProteinFormat.GetHeader()); foreach (var mpg in ir) { for (int i = 0; i < mpg.Count; i++) { sw.WriteLine("${0}-{1}{2}", mpg.Index, i + 1, format.ProteinFormat.GetString(mpg[i])); } } } return new string[] { result }; }
public override IEnumerable <string> Process(string fileName) { var peps = new MascotPeptideTextFormat().ReadFromFile(fileName); peps.RemoveAll(m => !(m.Annotations["Number of Phospho (STY)"] as string).Equals("1")); var silac = new MascotResultTextFormat().ReadFromFile(silacFile); var silacPeps = silac.GetSpectra(); silacPeps.RemoveAll(m => m.GetQuantificationItem() == null || !m.GetQuantificationItem().HasRatio); Regex reg = new Regex(@"Cx_(.+)"); var silacMap = silacPeps.ToGroupDictionary(m => m.Peptide.PureSequence + GetModificationCount(m.Peptide, "STY")); int found = 0; int missed = 0; var matchFile = fileName + ".match"; using (StreamWriter sw = new StreamWriter(matchFile)) { sw.Write("Sequence"); var mq = peps[0].GetMaxQuantItemList(); foreach (var mqi in mq) { sw.Write("\tm_" + mqi.Name); sw.Write("\ts_" + mqi.Name); } sw.WriteLine(); foreach (var p in peps) { var pureSeqKey = p.Peptide.PureSequence + p.Annotations["Number of Phospho (STY)"].ToString(); if (silacMap.ContainsKey(pureSeqKey)) { found++; Console.WriteLine("Find - " + pureSeqKey); var findPep = silacMap[pureSeqKey]; var findPepMap = findPep.ToGroupDictionary(m => reg.Match(m.Query.FileScan.Experimental).Groups[1].Value); mq = p.GetMaxQuantItemList(); sw.Write(p.Peptide.PureSequence); foreach (var mqi in mq) { if (string.IsNullOrEmpty(mqi.Ratio)) { sw.Write("\t"); } else { sw.Write("\t{0:0.00}", Math.Log(MyConvert.ToDouble(mqi.Ratio))); } if (!findPepMap.ContainsKey(mqi.Name)) { sw.Write("\t"); } else { var spectra = findPepMap[mqi.Name]; spectra.Sort((m1, m2) => m2.GetQuantificationItem().Correlation.CompareTo(m1.GetQuantificationItem().Correlation)); sw.Write("\t{0:0.00}", -Math.Log(spectra[0].GetQuantificationItem().Ratio)); } } sw.WriteLine(); } else { missed++; Console.WriteLine("Missed - " + pureSeqKey); } } } Console.WriteLine("Found = {0}; Missed = {1}", found, missed); // Regex reg =new Regex(@"Cx_(.+)"); return(new string[] { }); }
public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc) { IdentificationSummary result = new IdentificationSummary(); result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, ""); Regex decoyReg = new Regex(defaultDecoyPattern); IIdentifiedProteinGroupFilter decoyFilter = null; IFalseDiscoveryRateCalculator curCalc = null; var paramFile = FileUtils.ChangeExtension(proteinFile, ".param"); if (File.Exists(paramFile)) { BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile); if (options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = options.GetDecoyGroupFilter(); curCalc = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); } } if (decoyFilter == null) { decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false); curCalc = defaultCalc; } var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides"); if (File.Exists(peptideFile)) { var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var fullSpectra = GetSpectraByNPT(peptides, 2); var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra); var semiSpectra = GetSpectraByNPT(peptides, 1); var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra); result.FullSpectrumCount = GetSpectrumCount(fullSpectra); result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra); result.SemiSpectrumCount = GetSpectrumCount(semiSpectra); result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra); result.FullPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra); result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra); result.SemiPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra); result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra); result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount); result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount); result.FullPeptideFdr = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount); result.SemiPeptideFdr = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount); } if (File.Exists(proteinFile)) { var ir = new MascotResultTextFormat().ReadFromFile(proteinFile); ir.InitUniquePeptideCount(); var u2proteins = (from p in ir where p[0].UniquePeptideCount > 1 select p).ToList(); var u1proteins = (from p in ir where p[0].UniquePeptideCount == 1 select p).ToList(); result.ProteinGroupCount = ir.Count; result.Unique2ProteinGroupCount = u2proteins.Count; int targetCount; result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount); result.Unique2ProteinGroupTargetCount = (int)targetCount; result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount); result.Unique1ProteinGroupTargetCount = (int)targetCount; } return(result); }
public override IEnumerable <string> Process(string fileName) { var format = new MascotResultTextFormat(); var ir = format.ReadFromFile(fileName); foreach (var g in ir) { foreach (var p in g) { string value; if (this.parser.TryParse(p.Name, out value)) { p.Name = value; } } } for (int i = ir.Count - 1; i > 0; i--) { for (int j = i - 1; j >= 0; j--) { if (SameName(ir[i], ir[j])) { ir.RemoveAt(i); break; } } } calculator.Calculate(ir, expsMap); var countPassed = ir.TakeWhile(m => { var lr = m[0].GetLabelfreeResult(); return(lr.HasCountLargerThan(MinSpectrumCount - 1)); }); var result1 = FileUtils.ChangeExtension(fileName, "count"); using (StreamWriter sw = new StreamWriter(result1)) { sw.Write("Protein\tDescription"); foreach (var key in expsMap.Keys) { sw.Write("\t" + key); } sw.WriteLine(); foreach (var group in ir) { var protein = group[0]; sw.Write(protein.Name + "\t" + protein.Description); var lr = protein.GetLabelfreeResult(); foreach (var dsKey in expsMap.Keys) { var nsaf = lr[dsKey]; sw.Write("\t{0}", nsaf.Count); } sw.WriteLine(); } } var result = FileUtils.ChangeExtension(fileName, this.calculator.GetExtension()); using (StreamWriter sw = new StreamWriter(result)) { sw.Write("Protein\tDescription"); foreach (var key in expsMap.Keys) { sw.Write("\t" + key); } foreach (var key in expsMap.Keys) { sw.Write("\t" + key + "_count"); } sw.WriteLine(); foreach (var group in countPassed) { var protein = group[0]; sw.Write(protein.Name + "\t" + protein.Description.Replace('\'', ' ').Replace('\t', ' ')); var lr = protein.GetLabelfreeResult(); foreach (var dsKey in expsMap.Keys) { var nsaf = lr[dsKey]; sw.Write("\t{0:0.000000}", nsaf.Value); } foreach (var dsKey in expsMap.Keys) { var nsaf = lr[dsKey]; sw.Write("\t{0}", nsaf.Count); } sw.WriteLine(); } } return(new string[] { result }); }
public override IEnumerable <string> Process(string optionFile) { this.options = O18QuantificationFileProcessorOptions.Load(optionFile); var calc = options.GetProteinRatioCalculator(); var detailDirectory = options.GetDetailDirectory(); if (!Directory.Exists(detailDirectory)) { Directory.CreateDirectory(detailDirectory); } var format = new MascotResultTextFormat(); IIdentifiedResult mr = format.ReadFromFile(options.ProteinFile); CheckRawFilename(mr, optionFile); Dictionary <string, List <IIdentifiedSpectrum> > filePepMap = GetFilePeptideMap(mr); Dictionary <IIdentifiedPeptide, O18QuantificationSummaryItem> pepResultMap = new Dictionary <IIdentifiedPeptide, O18QuantificationSummaryItem>(); foreach (string filename in filePepMap.Keys) { Progress.SetMessage("Processing " + filename); string rawFilename = filename; if (new FileInfo(filename).Name.Equals("Cmpd.raw")) { rawFilename = FindRawFileName(options.ProteinFile); } string experimental = FileUtils.ChangeExtension(new FileInfo(rawFilename).Name, ""); using (CacheRawFile rawFile = new CacheRawFile(rawFilename)) { int firstScanNumber = rawFile.GetFirstSpectrumNumber(); int lastScanNumber = rawFile.GetLastSpectrumNumber(); List <IIdentifiedSpectrum> peps = filePepMap[filename]; Dictionary <string, DifferentRetentionTimeEnvelopes> peptideChargeMap = new Dictionary <string, DifferentRetentionTimeEnvelopes>(); Dictionary <O18QuanEnvelopes, List <IIdentifiedSpectrum> > pklMpMap = new Dictionary <O18QuanEnvelopes, List <IIdentifiedSpectrum> >(); Progress.SetRange(0, peps.Count); foreach (IIdentifiedSpectrum mphit in peps) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.Increment(1); IIdentifiedPeptide mp = mphit.Peptide; if (mp.Sequence.EndsWith("-")) { //it cannot be O18 labelled, just skip it continue; } int startScan = mphit.Query.FileScan.FirstScan; double theoreticalMz = GetTheoretialO16Mz(gapO18O16, mphit); int theoreticalMass = (int)(theoreticalMz * mphit.Query.Charge + 0.5); string sequenceCharge = PeptideUtils.GetPureSequence(mphit.Sequence) + "." + mphit.Query.Charge + "." + theoreticalMass; if (!peptideChargeMap.ContainsKey(sequenceCharge)) { peptideChargeMap.Add(sequenceCharge, new DifferentRetentionTimeEnvelopes()); } bool bFound = false; DifferentRetentionTimeEnvelopes pkls = peptideChargeMap[sequenceCharge]; foreach (var pklList in pkls) { if (pklList.Count == 0) { continue; } if (pklList[0].Scan > startScan) { continue; } if (pklList[pklList.Count - 1].Scan < startScan) { continue; } pklMpMap[pklList].Add(mphit); bFound = true; bool findIdentified = false; for (int i = 1; i < pklList.Count; i++) { if (pklList[i].ScanTimes[0].Scan > startScan) { pklList[i - 1].IsIdentified = true; findIdentified = true; break; } } if (!findIdentified) { pklList[pklList.Count - 1].IsIdentified = true; } } if (bFound) { continue; } double mzTolerance = PrecursorUtils.ppm2mz(theoreticalMz, options.PPMTolerance); O18QuanEnvelopes envelopes = new O18QuanEnvelopes(); bool bFirst = true; int count = 0; //backward for (int scan = startScan; scan >= firstScanNumber; scan--) { if (1 == rawFile.GetMsLevel(scan)) { O18QuanEnvelope envelope = GetCorrespondingEnvelope(rawFile, theoreticalMz, mphit.Query.Charge, mzTolerance, scan); //At most one invalid scan inside both pre or post identification scan range. if (!IsValidEnvelope(envelope, mphit.Charge)) { if (count > 0) { envelopes.RemoveAt(0); break; } else { count++; } } else { count = 0; } if (bFirst) { envelope.IsIdentified = true; bFirst = false; } envelopes.Insert(0, envelope); } } if (envelopes.Count == 0) { //If the identified scan has no quantification information ,ignore it. continue; } count = 0; //forward for (int scan = startScan + 1; scan <= lastScanNumber; scan++) { if (1 == rawFile.GetMsLevel(scan)) { var envelope = GetCorrespondingEnvelope(rawFile, theoreticalMz, mphit.Query.Charge, mzTolerance, scan); //At most one invalid scan inside both pre or post identification scan range. if (!IsValidEnvelope(envelope, mphit.Charge)) { if (count > 0) { envelopes.RemoveAt(envelopes.Count - 1); break; } else { count = 1; } } else { count = 0; } envelopes.Add(envelope); } } if (envelopes.Count == 0) { continue; } string scanCurr = envelopes.GetScanRange(); //check scan list again bFound = false; foreach (var pklList in pkls) { if (pklList.Count == 0) { continue; } string scanOld = pklList.GetScanRange(); if (scanOld.Equals(scanCurr)) { pklMpMap[pklList].Add(mphit); bFound = true; break; } } if (bFound) { continue; } pkls.Add(envelopes); pklMpMap.Add(envelopes, new List <IIdentifiedSpectrum>()); pklMpMap[envelopes].Add(mphit); } var detailFilePrefix = options.GetDetailDirectory() + "\\" + new FileInfo(options.ProteinFile).Name; foreach (string sequenceCharge in peptideChargeMap.Keys) { DifferentRetentionTimeEnvelopes pkls = peptideChargeMap[sequenceCharge]; foreach (var envelopes in pkls) { if (0 == envelopes.Count) { continue; } List <IIdentifiedSpectrum> mps = pklMpMap[envelopes]; double mzTolerance = PrecursorUtils.ppm2mz(mps[0].Query.ObservedMz, options.PPMTolerance); O18QuantificationPeptideProcessor processor = new O18QuantificationPeptideProcessor(fileFormat, options.IsPostDigestionLabelling, rawFilename, PeptideUtils.GetPureSequence(mps[0].Sequence), options.PurityOfO18Water, envelopes, mzTolerance, "", options.GetScanPercentageStart() / 100, options.GetScanPercentageEnd() / 100); processor.TheoreticalMz = GetTheoretialO16Mz(gapO18O16, mps[0]); processor.Charge = mps[0].Charge; processor.SoftwareVersion = options.SoftwareVersion; var resultFilename = MyConvert.Format("{0}.{1}.{2}.{3}.{4}.O18", detailFilePrefix, experimental, PeptideUtils.GetPureSequence(mps[0].Sequence), mps[0].Charge, envelopes.GetScanRange()); processor.Process(resultFilename); O18QuantificationSummaryItem item = fileFormat.ReadFromFile(resultFilename); int maxScoreItemIndex = FindMaxScoreItemIndex(mps); var relativeFile = Path.Combine(Path.GetFileName(options.GetDetailDirectory()), Path.GetFileName(resultFilename)); for (int i = 0; i < mps.Count; i++) { if (maxScoreItemIndex == i) { item.AssignToAnnotation(mps[i], relativeFile); } else { item.AssignDuplicationToAnnotation(mps[i], relativeFile); } } } } } } List <IIdentifiedSpectrum> peptides = mr.GetSpectra(); foreach (IIdentifiedSpectrum mphit in peptides) { if (!mphit.Annotations.ContainsKey(O18QuantificationConstants.O18_RATIO_SCANCOUNT)) { mphit.Annotations[O18QuantificationConstants.O18_RATIO_SCANCOUNT] = "-"; } mphit.SetEnabled(calc.HasPeptideRatio(mphit)); } calc.Calculate(mr, m => true); string resultFile = FileUtils.ChangeExtension(optionFile, ".O18summary"); format.InitializeByResult(mr); format.ProteinFormat = format.ProteinFormat.GetLineFormat(O18QuantificationConstants.O18_EXPORT_PROTEIN_HEADER); format.WriteToFile(resultFile, mr); Progress.SetMessage("Finished, result was saved to " + resultFile); return(new[] { resultFile }); }
public override IEnumerable <string> Process() { //Prepare unique peptide file var format = new MascotResultTextFormat(); var proteins = format.ReadFromFile(options.InputFile); proteins.RemoveAmbiguousSpectra(); var spectra = proteins.GetSpectra(); foreach (var spec in spectra) { spec.Annotations.Remove("TheoreticalDeuterium"); spec.Annotations.Remove("ObservedDeuterium"); spec.Annotations.Remove("NumDeuteriumIncorporated"); spec.Annotations.Remove("NumExchangableHydrogen"); spec.Annotations.Remove("DeuteriumEnrichmentPercent"); } var peptideFile = Path.ChangeExtension(options.InputFile, ".unique.peptides"); var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers); peptideFormat.WriteToFile(peptideFile, spectra); //Calculate deterium enrichment at peptide level var pepOptions = new DeuteriumCalculatorOptions(); options.CopyProperties(pepOptions); pepOptions.InputFile = peptideFile; pepOptions.OutputFile = peptideFile + ".tsv"; var pepCalc = new PeptideDeuteriumCalculator(pepOptions); pepCalc.Progress = this.Progress; pepCalc.Process(); //Copy annotation from calculated peptide to original peptide var calcSpectra = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile()); var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName); foreach (var calcSpec in calcSpectra) { var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName]; foreach (var ann in calcSpec.Annotations) { oldSpec.Annotations[ann.Key] = ann.Value; } } //Remove the peptide not contain calculation result for (int i = proteins.Count - 1; i >= 0; i--) { foreach (var protein in proteins[i]) { protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent")); } if (proteins[i][0].Peptides.Count == 0) { proteins.RemoveAt(i); } } format.PeptideFormat = peptideFormat.PeptideFormat; var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv"); format.WriteToFile(noredundantFile, proteins); var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray(); var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv"); using (var sw = new StreamWriter(timeFile)) { sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t")); foreach (var protein in proteins) { var curSpectra = protein[0].GetSpectra(); if (options.PeptideInAllTimePointOnly) { var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence); curSpectra.Clear(); foreach (var peps in curMap.Values) { var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); if (times.All(time => pepMap.ContainsKey(time))) { curSpectra.AddRange(peps); } } } if (curSpectra.Count == 0) { continue; } sw.Write((from p in protein select p.Name).Merge("/")); var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]); foreach (var time in times) { if (curTimeMap.ContainsKey(time)) { var deps = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray(); var depMedian = Statistics.Median(deps); sw.Write("\t{0:0.######}", depMedian); } else { sw.Write("\tNA"); } } sw.WriteLine(); } } Progress.SetMessage("Calculating ratio consistant ..."); var deuteriumOptions = new RTemplateProcessorOptions() { InputFile = timeFile, OutputFile = options.OutputFile, RTemplate = RatioR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; new RTemplateProcessor(deuteriumOptions) { Progress = this.Progress }.Process(); Progress.SetMessage("Finished ..."); return(new string[] { options.OutputFile }); }
protected override IIdentifiedResult GetIdentifiedResult(string fileName) { format = new MascotResultTextFormat(); return(format.ReadFromFile(fileName)); }