public void Quantify(string rawFileName, List <IIdentifiedSpectrum> spectra, string detailDir) { if (!Directory.Exists(detailDir)) { Directory.CreateDirectory(detailDir); } var experimental = RawFileFactory.GetExperimental(rawFileName); Dictionary <string, DifferentRetentionTimeEnvelopes> spectrumKeyMap = new Dictionary <string, DifferentRetentionTimeEnvelopes>(); Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> > envelopeSpectrumGroup = new Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> >(); double precursorPPM = GetPrecursorPPM(spectra); try { _rawReader.Open(rawFileName); int firstScanNumber = _rawReader.GetFirstSpectrumNumber(); int lastScanNumber = _rawReader.GetLastSpectrumNumber(); Progress.SetRange(1, spectra.Count); int pepCount = 0; for (int s = 0; s < spectra.Count; s++) { Console.WriteLine(s); IIdentifiedSpectrum spectrum = spectra[s]; SilacQuantificationSummaryItem.ClearAnnotation(spectrum); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } int startScan = spectrum.Query.FileScan.FirstScan; if (startScan > lastScanNumber) { spectrum.GetOrCreateQuantificationItem().RatioStr = "OUT_OF_RANGE"; continue; } Progress.SetPosition(pepCount++); IIdentifiedPeptide sp = spectrum.Peptide; string seq = GetMatchSequence(spectrum); IPeptideInfo peptideInfo = new IdentifiedPeptideInfo(seq, spectrum.TheoreticalMH, spectrum.Query.Charge); SilacCompoundInfo sci = GetSilacCompoundInfo(peptideInfo); //如果轻重离子理论质荷比一样,忽略 if (!sci.IsSilacData()) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NOT_SILAC"; continue; } //如果轻重离子理论质荷比与观测值不一致,忽略 if (!sci.IsMzEquals(spectrum.ObservedMz, MAX_DELTA_MZ)) { ValidateModifications(seq); spectrum.GetOrCreateQuantificationItem().RatioStr = "WRONG_IDENTIFICATION"; continue; } //如果没有找到相应的FullScan,忽略 int identifiedFullScan = _rawReader.FindPreviousFullScan(startScan, firstScanNumber); if (-1 == identifiedFullScan) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE"; continue; } DifferentRetentionTimeEnvelopes pkls = FindEnvelopes(spectrumKeyMap, spectrum, sci); SilacEnvelopes envelope = pkls.FindSilacEnvelope(identifiedFullScan); //如果该scan被包含在已经被定量的结果中,忽略 if (envelope != null) { envelope.SetScanIdentified(identifiedFullScan, spectrum.IsExtendedIdentification()); envelopeSpectrumGroup[envelope].Add(spectrum); continue; } //从原始文件中找出该spectrum的定量信息 int maxIndex = Math.Min(option.ProfileLength - 1, pkls.LightProfile.FindMaxIndex()); double mzTolerance = PrecursorUtils.ppm2mz(sci.Light.Mz, option.PPMTolerance); //如果FullScan没有相应的离子,忽略。(鉴定错误或者扩展定量时候,会出现找不到pair的现象) SilacPeakListPair splp = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, identifiedFullScan); if (null == splp) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE"; continue; } splp.IsIdentified = true; splp.IsExtendedIdentification = spectrum.IsExtendedIdentification(); SilacEnvelopes envelopes = new SilacEnvelopes(); envelopes.Add(splp); //向前查找定量信息 int fullScan = identifiedFullScan; int scanNumber = 0; while ((fullScan = _rawReader.FindPreviousFullScan(fullScan - 1, firstScanNumber)) != -1) { if (_rawReader.IsBadDataScan(fullScan)) { continue; } scanNumber++; var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber); if (null == item) { break; } envelopes.Add(item); } envelopes.Reverse(); //向后查找定量信息 fullScan = identifiedFullScan; scanNumber = 0; while ((fullScan = _rawReader.FindNextFullScan(fullScan + 1, lastScanNumber)) != -1) { if (_rawReader.IsBadDataScan(fullScan)) { continue; } scanNumber++; var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber); if (null == item) { break; } envelopes.Add(item); } //对每个scan计算轻重的离子丰度 envelopes.ForEach(m => m.CalculateIntensity(pkls.LightProfile, pkls.HeavyProfile)); pkls.Add(envelopes); envelopeSpectrumGroup.Add(envelopes, new List <IIdentifiedSpectrum>()); envelopeSpectrumGroup[envelopes].Add(spectrum); } } finally { _rawReader.Close(); } foreach (string key in spectrumKeyMap.Keys) { DifferentRetentionTimeEnvelopes pkls = spectrumKeyMap[key]; foreach (SilacEnvelopes envelopes in pkls) { if (0 == envelopes.Count) { continue; } List <IIdentifiedSpectrum> mps = envelopeSpectrumGroup[envelopes]; double mzTolerance = PrecursorUtils.ppm2mz(mps[0].Query.ObservedMz, option.PPMTolerance); string scanStr = GetScanRange(envelopes); string resultFilename = detailDir + "\\" + mps[0].Query.FileScan.Experimental + "." + PeptideUtils.GetPureSequence(mps[0].Sequence) + "." + mps[0].Query.Charge + scanStr + ".silac"; IPeptideInfo peptideInfo = new IdentifiedPeptideInfo(mps[0].GetMatchSequence(), mps[0].TheoreticalMH, mps[0].Query.Charge); SilacCompoundInfo sci = GetSilacCompoundInfo(peptideInfo); SilacQuantificationSummaryItem item = new SilacQuantificationSummaryItem(sci.Light.IsSample); item.RawFilename = rawFileName; item.SoftwareVersion = this.SoftwareVersion; item.PeptideSequence = mps[0].Sequence; item.Charge = mps[0].Charge; item.LightAtomComposition = sci.Light.Composition.ToString(); item.HeavyAtomComposition = sci.Heavy.Composition.ToString(); item.LightProfile = pkls.LightProfile; item.HeavyProfile = pkls.HeavyProfile; item.ObservedEnvelopes = envelopes; item.ValidateScans(sci, precursorPPM); item.Smoothing(); item.CalculateRatio(); new SilacQuantificationSummaryItemXmlFormat().WriteToFile(resultFilename, item); int maxScoreItemIndex = FindMaxScoreItemIndex(mps); for (int i = 0; i < mps.Count; i++) { if (maxScoreItemIndex == i) { item.AssignToAnnotation(mps[i], resultFilename); } else { item.AssignDuplicationToAnnotation(mps[i], resultFilename); } } } } foreach (IIdentifiedSpectrum mph in spectra) { mph.InitializeRatioEnabled(); } }
public override IEnumerable <string> Process() { var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv"); if (!File.Exists(boundaryInput) || options.Overwrite) { var format = GetPeptideReader(); var spectra = format.ReadFromFile(options.InputFile); var peptideMap = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower()); var rawfiles = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower()); var rententionWindow = options.MaximumRetentionTimeWindow; var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray(); if (missed.Length > 0) { throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/"))); } var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount; var option = new ParallelOptions() { MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count), }; var chroMap = new List <Tuple <string, List <ChromatographProfile> > >(); foreach (var raw in peptideMap) { var peptides = raw.Value; var waitingPeaks = new List <ChromatographProfile>(); foreach (var peptide in peptides) { var chro = new ChromatographProfile() { Experimental = peptide.Query.FileScan.Experimental, IdentifiedScan = peptide.Query.FileScan.FirstScan, IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime, ObservedMz = peptide.GetPrecursorMz(), TheoreticalMz = peptide.GetTheoreticalMz(), Charge = peptide.Query.Charge, Sequence = peptide.Peptide.PureSequence, FileName = GetTargetFile(peptide), SubFileName = GetTargetSubFile(peptide) }; chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage); waitingPeaks.Add(chro); } chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks)); } ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>(); Parallel.ForEach(chroMap, option, raw => { var rawFileName = raw.Item1; var waitingPeaks = raw.Item2; Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >(); List <FullMS> fullMSList = new List <FullMS>(); Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "..."); using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName]))) { var firstScan = rawReader.GetFirstSpectrumNumber(); var lastScan = rawReader.GetLastSpectrumNumber(); for (int scan = firstScan; scan <= lastScan; scan++) { var mslevel = rawReader.GetMsLevel(scan); if (mslevel == 1) { fullMSList.Add(new FullMS() { Scan = scan, RetentionTime = rawReader.ScanToRetentionTime(scan), Peaks = null }); } } foreach (var chro in waitingPeaks) { if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0) { for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime) { break; } chro.IdentifiedScan = fullMSList[i].Scan + 1; } } } var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId()); foreach (var chroGroup in chroGroups) { List <ChromatographProfile> profileChros = new List <ChromatographProfile>(); foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan)) { var masterScanIndex = 0; for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedScan < fullMSList[i].Scan) { break; } masterScanIndex = i; } var masterScan = fullMSList[masterScanIndex].Scan; var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime; bool bExist = false; foreach (var profileChro in profileChros) { foreach (var pkl in profileChro.Profiles) { if (pkl.Scan == fullMSList[masterScanIndex].Scan) { pkl.Identified = true; bExist = true; break; } } if (bExist) { break; } } if (bExist) { continue; } //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName)); //allow one missed scan int naCount = 2; for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (masterRetentionTime - curRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } if (scanIndex == masterScanIndex) { chro.Profiles.Last().Identified = true; } } chro.Profiles.Reverse(); naCount = 2; for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (curRetentionTime - masterRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } } profileChros.Add(chro); } profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount); profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count)); bool bMain = true; foreach (var chro in profileChros) { var filename = bMain ? chro.FileName : chro.SubFileName; if (bMain) { detected.Add(chro); } bMain = false; new ChromatographProfileTextWriter().WriteToFile(filename, chro); new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro); } } } } ); var chroList = new List <ChromatographProfile>(detected); chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName)); if (chroList.Count == 0) { throw new Exception("Cannot find chromotograph!"); } using (var sw = new StreamWriter(boundaryInput)) { sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan"); foreach (var chro in chroList) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", Path.GetDirectoryName(chro.FileName).Replace("\\", "/"), Path.GetFileNameWithoutExtension(chro.FileName), chro.Experimental, chro.GetPeptideId(), chro.TheoreticalMz, chro.Charge, chro.IdentifiedScan); } } } if (!File.Exists(options.OutputFile) || options.Overwrite) { Progress.SetMessage("Finding boundaries ..."); var boundaryOptions = new RTemplateProcessorOptions() { InputFile = boundaryInput, OutputFile = options.OutputFile, RTemplate = BoundaryR, RExecute = ExternalProgramConfig.GetExternalProgram("R"), CreateNoWindow = true }; boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString()); new RTemplateProcessor(boundaryOptions) { Progress = this.Progress }.Process(); } //if (options.DrawImage) //{ // Progress.SetMessage("Drawing images ..."); // var imageOptions = new RTemplateProcessorOptions() // { // InputFile = options.OutputFile, // OutputFile = Path.ChangeExtension(options.OutputFile, ".image"), // RTemplate = ImageR, // RExecute = SystemUtils.GetRExecuteLocation(), // CreateNoWindow = true, // NoResultFile = true // }; // new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process(); //} return(new string[] { options.OutputFile }); }