コード例 #1
0
        public void Quantify(string rawFileName, List <IIdentifiedSpectrum> spectra, string detailDir)
        {
            if (!Directory.Exists(detailDir))
            {
                Directory.CreateDirectory(detailDir);
            }

            var experimental = RawFileFactory.GetExperimental(rawFileName);

            Dictionary <string, DifferentRetentionTimeEnvelopes> spectrumKeyMap = new Dictionary <string, DifferentRetentionTimeEnvelopes>();

            Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> > envelopeSpectrumGroup = new Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> >();

            double precursorPPM = GetPrecursorPPM(spectra);

            try
            {
                _rawReader.Open(rawFileName);

                int firstScanNumber = _rawReader.GetFirstSpectrumNumber();
                int lastScanNumber  = _rawReader.GetLastSpectrumNumber();

                Progress.SetRange(1, spectra.Count);
                int pepCount = 0;

                for (int s = 0; s < spectra.Count; s++)
                {
                    Console.WriteLine(s);
                    IIdentifiedSpectrum spectrum = spectra[s];

                    SilacQuantificationSummaryItem.ClearAnnotation(spectrum);

                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    int startScan = spectrum.Query.FileScan.FirstScan;
                    if (startScan > lastScanNumber)
                    {
                        spectrum.GetOrCreateQuantificationItem().RatioStr = "OUT_OF_RANGE";
                        continue;
                    }

                    Progress.SetPosition(pepCount++);

                    IIdentifiedPeptide sp = spectrum.Peptide;

                    string seq = GetMatchSequence(spectrum);

                    IPeptideInfo peptideInfo = new IdentifiedPeptideInfo(seq, spectrum.TheoreticalMH, spectrum.Query.Charge);

                    SilacCompoundInfo sci = GetSilacCompoundInfo(peptideInfo);

                    //如果轻重离子理论质荷比一样,忽略
                    if (!sci.IsSilacData())
                    {
                        spectrum.GetOrCreateQuantificationItem().RatioStr = "NOT_SILAC";
                        continue;
                    }

                    //如果轻重离子理论质荷比与观测值不一致,忽略
                    if (!sci.IsMzEquals(spectrum.ObservedMz, MAX_DELTA_MZ))
                    {
                        ValidateModifications(seq);

                        spectrum.GetOrCreateQuantificationItem().RatioStr = "WRONG_IDENTIFICATION";
                        continue;
                    }

                    //如果没有找到相应的FullScan,忽略
                    int identifiedFullScan = _rawReader.FindPreviousFullScan(startScan, firstScanNumber);
                    if (-1 == identifiedFullScan)
                    {
                        spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE";
                        continue;
                    }

                    DifferentRetentionTimeEnvelopes pkls = FindEnvelopes(spectrumKeyMap, spectrum, sci);

                    SilacEnvelopes envelope = pkls.FindSilacEnvelope(identifiedFullScan);

                    //如果该scan被包含在已经被定量的结果中,忽略
                    if (envelope != null)
                    {
                        envelope.SetScanIdentified(identifiedFullScan, spectrum.IsExtendedIdentification());
                        envelopeSpectrumGroup[envelope].Add(spectrum);
                        continue;
                    }

                    //从原始文件中找出该spectrum的定量信息
                    int maxIndex = Math.Min(option.ProfileLength - 1, pkls.LightProfile.FindMaxIndex());

                    double mzTolerance = PrecursorUtils.ppm2mz(sci.Light.Mz, option.PPMTolerance);

                    //如果FullScan没有相应的离子,忽略。(鉴定错误或者扩展定量时候,会出现找不到pair的现象)
                    SilacPeakListPair splp = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, identifiedFullScan);
                    if (null == splp)
                    {
                        spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE";
                        continue;
                    }

                    splp.IsIdentified             = true;
                    splp.IsExtendedIdentification = spectrum.IsExtendedIdentification();

                    SilacEnvelopes envelopes = new SilacEnvelopes();
                    envelopes.Add(splp);

                    //向前查找定量信息
                    int fullScan   = identifiedFullScan;
                    int scanNumber = 0;
                    while ((fullScan = _rawReader.FindPreviousFullScan(fullScan - 1, firstScanNumber)) != -1)
                    {
                        if (_rawReader.IsBadDataScan(fullScan))
                        {
                            continue;
                        }
                        scanNumber++;
                        var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber);
                        if (null == item)
                        {
                            break;
                        }

                        envelopes.Add(item);
                    }
                    envelopes.Reverse();

                    //向后查找定量信息
                    fullScan   = identifiedFullScan;
                    scanNumber = 0;
                    while ((fullScan = _rawReader.FindNextFullScan(fullScan + 1, lastScanNumber)) != -1)
                    {
                        if (_rawReader.IsBadDataScan(fullScan))
                        {
                            continue;
                        }
                        scanNumber++;
                        var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber);
                        if (null == item)
                        {
                            break;
                        }

                        envelopes.Add(item);
                    }

                    //对每个scan计算轻重的离子丰度
                    envelopes.ForEach(m => m.CalculateIntensity(pkls.LightProfile, pkls.HeavyProfile));

                    pkls.Add(envelopes);

                    envelopeSpectrumGroup.Add(envelopes, new List <IIdentifiedSpectrum>());
                    envelopeSpectrumGroup[envelopes].Add(spectrum);
                }
            }
            finally
            {
                _rawReader.Close();
            }

            foreach (string key in spectrumKeyMap.Keys)
            {
                DifferentRetentionTimeEnvelopes pkls = spectrumKeyMap[key];

                foreach (SilacEnvelopes envelopes in pkls)
                {
                    if (0 == envelopes.Count)
                    {
                        continue;
                    }

                    List <IIdentifiedSpectrum> mps = envelopeSpectrumGroup[envelopes];

                    double mzTolerance = PrecursorUtils.ppm2mz(mps[0].Query.ObservedMz, option.PPMTolerance);

                    string scanStr = GetScanRange(envelopes);

                    string resultFilename = detailDir + "\\" + mps[0].Query.FileScan.Experimental + "." + PeptideUtils.GetPureSequence(mps[0].Sequence) + "." + mps[0].Query.Charge + scanStr + ".silac";

                    IPeptideInfo      peptideInfo = new IdentifiedPeptideInfo(mps[0].GetMatchSequence(), mps[0].TheoreticalMH, mps[0].Query.Charge);
                    SilacCompoundInfo sci         = GetSilacCompoundInfo(peptideInfo);

                    SilacQuantificationSummaryItem item = new SilacQuantificationSummaryItem(sci.Light.IsSample);
                    item.RawFilename          = rawFileName;
                    item.SoftwareVersion      = this.SoftwareVersion;
                    item.PeptideSequence      = mps[0].Sequence;
                    item.Charge               = mps[0].Charge;
                    item.LightAtomComposition = sci.Light.Composition.ToString();
                    item.HeavyAtomComposition = sci.Heavy.Composition.ToString();
                    item.LightProfile         = pkls.LightProfile;
                    item.HeavyProfile         = pkls.HeavyProfile;
                    item.ObservedEnvelopes    = envelopes;

                    item.ValidateScans(sci, precursorPPM);

                    item.Smoothing();
                    item.CalculateRatio();

                    new SilacQuantificationSummaryItemXmlFormat().WriteToFile(resultFilename, item);

                    int maxScoreItemIndex = FindMaxScoreItemIndex(mps);

                    for (int i = 0; i < mps.Count; i++)
                    {
                        if (maxScoreItemIndex == i)
                        {
                            item.AssignToAnnotation(mps[i], resultFilename);
                        }
                        else
                        {
                            item.AssignDuplicationToAnnotation(mps[i], resultFilename);
                        }
                    }
                }
            }

            foreach (IIdentifiedSpectrum mph in spectra)
            {
                mph.InitializeRatioEnabled();
            }
        }
        public override IEnumerable <string> Process()
        {
            var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv");

            if (!File.Exists(boundaryInput) || options.Overwrite)
            {
                var format           = GetPeptideReader();
                var spectra          = format.ReadFromFile(options.InputFile);
                var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
                var rawfiles         = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower());
                var rententionWindow = options.MaximumRetentionTimeWindow;

                var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();
                if (missed.Length > 0)
                {
                    throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/")));
                }

                var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount;
                var option            = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count),
                };

                var chroMap = new List <Tuple <string, List <ChromatographProfile> > >();
                foreach (var raw in peptideMap)
                {
                    var peptides = raw.Value;

                    var waitingPeaks = new List <ChromatographProfile>();
                    foreach (var peptide in peptides)
                    {
                        var chro = new ChromatographProfile()
                        {
                            Experimental            = peptide.Query.FileScan.Experimental,
                            IdentifiedScan          = peptide.Query.FileScan.FirstScan,
                            IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime,
                            ObservedMz    = peptide.GetPrecursorMz(),
                            TheoreticalMz = peptide.GetTheoreticalMz(),
                            Charge        = peptide.Query.Charge,
                            Sequence      = peptide.Peptide.PureSequence,
                            FileName      = GetTargetFile(peptide),
                            SubFileName   = GetTargetSubFile(peptide)
                        };
                        chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage);
                        waitingPeaks.Add(chro);
                    }

                    chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks));
                }

                ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>();

                Parallel.ForEach(chroMap, option, raw =>
                {
                    var rawFileName  = raw.Item1;
                    var waitingPeaks = raw.Item2;

                    Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                    List <FullMS> fullMSList = new List <FullMS>();
                    Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "...");
                    using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName])))
                    {
                        var firstScan = rawReader.GetFirstSpectrumNumber();
                        var lastScan  = rawReader.GetLastSpectrumNumber();
                        for (int scan = firstScan; scan <= lastScan; scan++)
                        {
                            var mslevel = rawReader.GetMsLevel(scan);
                            if (mslevel == 1)
                            {
                                fullMSList.Add(new FullMS()
                                {
                                    Scan          = scan,
                                    RetentionTime = rawReader.ScanToRetentionTime(scan),
                                    Peaks         = null
                                });
                            }
                        }

                        foreach (var chro in waitingPeaks)
                        {
                            if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0)
                            {
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime)
                                    {
                                        break;
                                    }
                                    chro.IdentifiedScan = fullMSList[i].Scan + 1;
                                }
                            }
                        }

                        var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId());
                        foreach (var chroGroup in chroGroups)
                        {
                            List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                            foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                            {
                                var masterScanIndex = 0;
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedScan < fullMSList[i].Scan)
                                    {
                                        break;
                                    }
                                    masterScanIndex = i;
                                }
                                var masterScan          = fullMSList[masterScanIndex].Scan;
                                var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                                bool bExist = false;
                                foreach (var profileChro in profileChros)
                                {
                                    foreach (var pkl in profileChro.Profiles)
                                    {
                                        if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                        {
                                            pkl.Identified = true;
                                            bExist         = true;
                                            break;
                                        }
                                    }

                                    if (bExist)
                                    {
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    continue;
                                }

                                //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                                //allow one missed scan
                                int naCount = 2;
                                for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (masterRetentionTime - curRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }

                                    if (scanIndex == masterScanIndex)
                                    {
                                        chro.Profiles.Last().Identified = true;
                                    }
                                }
                                chro.Profiles.Reverse();

                                naCount = 2;
                                for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (curRetentionTime - masterRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }
                                }

                                profileChros.Add(chro);
                            }

                            profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                            profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                            bool bMain = true;
                            foreach (var chro in profileChros)
                            {
                                var filename = bMain ? chro.FileName : chro.SubFileName;
                                if (bMain)
                                {
                                    detected.Add(chro);
                                }

                                bMain = false;

                                new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                                new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                            }
                        }
                    }
                }
                                 );

                var chroList = new List <ChromatographProfile>(detected);
                chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName));

                if (chroList.Count == 0)
                {
                    throw new Exception("Cannot find chromotograph!");
                }

                using (var sw = new StreamWriter(boundaryInput))
                {
                    sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan");
                    foreach (var chro in chroList)
                    {
                        sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}",
                                     Path.GetDirectoryName(chro.FileName).Replace("\\", "/"),
                                     Path.GetFileNameWithoutExtension(chro.FileName),
                                     chro.Experimental,
                                     chro.GetPeptideId(),
                                     chro.TheoreticalMz,
                                     chro.Charge,
                                     chro.IdentifiedScan);
                    }
                }
            }

            if (!File.Exists(options.OutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding boundaries ...");
                var boundaryOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = boundaryInput,
                    OutputFile     = options.OutputFile,
                    RTemplate      = BoundaryR,
                    RExecute       = ExternalProgramConfig.GetExternalProgram("R"),
                    CreateNoWindow = true
                };
                boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString());
                new RTemplateProcessor(boundaryOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            //if (options.DrawImage)
            //{
            //  Progress.SetMessage("Drawing images ...");

            //  var imageOptions = new RTemplateProcessorOptions()
            //  {
            //    InputFile = options.OutputFile,
            //    OutputFile = Path.ChangeExtension(options.OutputFile, ".image"),
            //    RTemplate = ImageR,
            //    RExecute = SystemUtils.GetRExecuteLocation(),
            //    CreateNoWindow = true,
            //    NoResultFile = true
            //  };
            //  new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process();
            //}

            return(new string[] { options.OutputFile });
        }