private bool AddEnvelope(ChromatographProfile chro, PeakList <Peak> curPeaks)
        {
            ChromatographProfileScan envelope = FindEnvelope(curPeaks, chro.IsotopicIons, options.MzTolerancePPM);

            if (envelope.Count < options.ProfileLength)
            {
                return(false);
            }

            if (envelope.CalculateProfileCorrelation(chro.IsotopicIons) < options.MinimumCorrelation)
            {
                return(false);
            }

            chro.Profiles.Add(envelope);
            return(true);
        }
        private bool AddEnvelope(ChromatographProfile chro, CacheRawFile rawReader, List <FullMS> fullMSList, int scanIndex)
        {
            PeakList <Peak> ms1 = ReadFullMS(rawReader, fullMSList, scanIndex);

            ChromatographProfileScan envelope = null;

            if (!profileFinder.Find(ms1, chro, options.MzTolerancePPM, options.MinimumProfileLength, ref envelope))
            {
                return(false);
            }

            if (options.MinimumProfileCorrelation > 0 && envelope.CalculateProfileCorrelation(chro.IsotopicIntensities) < options.MinimumProfileCorrelation)
            {
                return(false);
            }

            chro.Profiles.Add(envelope);
            return(true);
        }
 protected override List <Peak> Resolve(ChromatographProfile chro, List <List <Peak> > envelope)
 {
     return((from peaks in envelope
             let peakindex = peaks.FindMaxIndex()
                             select peaks[peakindex]).ToList());
 }
Esempio n. 4
0
 protected override List <Peak> Resolve(ChromatographProfile chro, List <List <Peak> > envelope)
 {
     //return Isotopic.InterfernceOptimization.ResolveByPearsonCorrelation(chro.IsotopicIntensities, envelope);
     return(Isotopic.InterfernceOptimization.ResolveByKullbackLeiblerDistance(chro.IsotopicIntensities, envelope));
 }
        public override IEnumerable <string> Process()
        {
            var format           = new MascotPeptideTextFormat();
            var spectra          = format.ReadFromFile(options.InputFile);
            var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
            var rawfiles         = Directory.GetFiles(options.RawDirectory, "*.raw", SearchOption.AllDirectories).ToDictionary(m => Path.GetFileNameWithoutExtension(m).ToLower());
            var rententionWindow = options.RetentionTimeWindow;

            var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();

            if (missed.Length > 0)
            {
                throw new Exception(string.Format("Cannot find raw file of {0} in directory {1}", missed.Merge("/"), options.RawDirectory));
            }

            var option = new ParallelOptions()
            {
                //MaxDegreeOfParallelism = Math.Min(1, peptideMap.Count),
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, peptideMap.Count),
            };

            Parallel.ForEach(peptideMap, option, raw =>
            {
                //foreach (var raw in peptideMap)
                //{
                var peptides = raw.Value;

                Progress.SetMessage("Preparing isotopic for " + raw.Key + " ...");
                var waitingPeaks = new List <ChromatographProfile>();
                foreach (var peptide in peptides)
                {
                    string file = GetTargetFile(peptide);
                    var chro    = new ChromatographProfile()
                    {
                        Experimental   = peptide.Query.FileScan.Experimental,
                        IdentifiedScan = peptide.Query.FileScan.FirstScan,
                        ObservedMz     = peptide.GetPrecursorMz(),
                        TheoreticalMz  = peptide.GetTheoreticalMz(),
                        Charge         = peptide.Query.Charge,
                        Sequence       = peptide.Peptide.PureSequence,
                        FileName       = Path.GetFileName(file)
                    };
                    chro.InitializeIsotopicIons(options.MzTolerancePPM);
                    waitingPeaks.Add(chro);
                }

                if (waitingPeaks.Count == 0)
                {
                    //continue;
                    return;
                }

                Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                List <FullMS> fullMSList = new List <FullMS>();
                Progress.SetMessage("Reading full ms list from " + rawfiles[raw.Key] + "...");
                using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[raw.Key])))
                {
                    var firstScan = rawReader.GetFirstSpectrumNumber();
                    var lastScan  = rawReader.GetLastSpectrumNumber();
                    for (int scan = firstScan; scan <= lastScan; scan++)
                    {
                        var mslevel = rawReader.GetMsLevel(scan);
                        if (mslevel == 1)
                        {
                            fullMSList.Add(new FullMS()
                            {
                                Scan          = scan,
                                RetentionTime = rawReader.ScanToRetentionTime(scan),
                                Peaks         = null
                            });
                        }
                    }

                    var chroGroups = waitingPeaks.GroupBy(chro => string.Format("{0}_{1:0.0000}", chro.Sequence, chro.TheoreticalMz));
                    foreach (var chroGroup in chroGroups)
                    {
                        List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                        foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                        {
                            var masterScanIndex = 0;
                            for (int i = 1; i < fullMSList.Count; i++)
                            {
                                if (chro.IdentifiedScan < fullMSList[i].Scan)
                                {
                                    break;
                                }
                                masterScanIndex = i;
                            }
                            var masterScan          = fullMSList[masterScanIndex].Scan;
                            var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                            bool bExist = false;
                            foreach (var profileChro in profileChros)
                            {
                                foreach (var pkl in profileChro.Profiles)
                                {
                                    if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                    {
                                        pkl.Identified = true;
                                        bExist         = true;
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    break;
                                }
                            }

                            if (bExist)
                            {
                                continue;
                            }

                            Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                            for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (masterRetentionTime - curRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }

                                if (scanIndex == masterScanIndex)
                                {
                                    chro.Profiles.Last().Identified = true;
                                }
                            }
                            chro.Profiles.Reverse();

                            for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (curRetentionTime - masterRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }
                            }

                            profileChros.Add(chro);
                        }

                        profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                        profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                        bool bMain = true;
                        foreach (var chro in profileChros)
                        {
                            string filename;
                            if (bMain)
                            {
                                filename = Path.Combine(GetTargetDirectory(chro.Experimental), chro.FileName);
                            }
                            else
                            {
                                filename = Path.Combine(GetTargetSubDirectory(chro.Experimental), Path.ChangeExtension(chro.FileName, ".sub" + Path.GetExtension(chro.FileName)));
                            }
                            bMain = false;

                            new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                            new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                        }
                    }
                }
            }
                             );

            Progress.SetMessage("Finding boundaries ...");
            var boundaryOptions = new RTemplateProcessorOptions()
            {
                InputFile      = targetDir,
                OutputFile     = options.OutputFile,
                RTemplate      = BoundaryR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(boundaryOptions)
            {
                Progress = this.Progress
            }.Process();

            return(new string[] { options.OutputFile });
        }
        private bool AddEnvelope(ChromatographProfile chro, CacheRawFile rawReader, List <FullMS> fullMSList, int scanIndex)
        {
            PeakList <Peak> fullMS = ReadFullMS(rawReader, fullMSList, scanIndex);

            return(AddEnvelope(chro, fullMS));
        }
        public override IEnumerable <string> Process()
        {
            var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv");

            if (!File.Exists(boundaryInput) || options.Overwrite)
            {
                var format           = GetPeptideReader();
                var spectra          = format.ReadFromFile(options.InputFile);
                var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
                var rawfiles         = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower());
                var rententionWindow = options.MaximumRetentionTimeWindow;

                var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();
                if (missed.Length > 0)
                {
                    throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/")));
                }

                var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount;
                var option            = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count),
                };

                var chroMap = new List <Tuple <string, List <ChromatographProfile> > >();
                foreach (var raw in peptideMap)
                {
                    var peptides = raw.Value;

                    var waitingPeaks = new List <ChromatographProfile>();
                    foreach (var peptide in peptides)
                    {
                        var chro = new ChromatographProfile()
                        {
                            Experimental            = peptide.Query.FileScan.Experimental,
                            IdentifiedScan          = peptide.Query.FileScan.FirstScan,
                            IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime,
                            ObservedMz    = peptide.GetPrecursorMz(),
                            TheoreticalMz = peptide.GetTheoreticalMz(),
                            Charge        = peptide.Query.Charge,
                            Sequence      = peptide.Peptide.PureSequence,
                            FileName      = GetTargetFile(peptide),
                            SubFileName   = GetTargetSubFile(peptide)
                        };
                        chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage);
                        waitingPeaks.Add(chro);
                    }

                    chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks));
                }

                ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>();

                Parallel.ForEach(chroMap, option, raw =>
                {
                    var rawFileName  = raw.Item1;
                    var waitingPeaks = raw.Item2;

                    Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                    List <FullMS> fullMSList = new List <FullMS>();
                    Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "...");
                    using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName])))
                    {
                        var firstScan = rawReader.GetFirstSpectrumNumber();
                        var lastScan  = rawReader.GetLastSpectrumNumber();
                        for (int scan = firstScan; scan <= lastScan; scan++)
                        {
                            var mslevel = rawReader.GetMsLevel(scan);
                            if (mslevel == 1)
                            {
                                fullMSList.Add(new FullMS()
                                {
                                    Scan          = scan,
                                    RetentionTime = rawReader.ScanToRetentionTime(scan),
                                    Peaks         = null
                                });
                            }
                        }

                        foreach (var chro in waitingPeaks)
                        {
                            if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0)
                            {
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime)
                                    {
                                        break;
                                    }
                                    chro.IdentifiedScan = fullMSList[i].Scan + 1;
                                }
                            }
                        }

                        var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId());
                        foreach (var chroGroup in chroGroups)
                        {
                            List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                            foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                            {
                                var masterScanIndex = 0;
                                for (int i = 1; i < fullMSList.Count; i++)
                                {
                                    if (chro.IdentifiedScan < fullMSList[i].Scan)
                                    {
                                        break;
                                    }
                                    masterScanIndex = i;
                                }
                                var masterScan          = fullMSList[masterScanIndex].Scan;
                                var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                                bool bExist = false;
                                foreach (var profileChro in profileChros)
                                {
                                    foreach (var pkl in profileChro.Profiles)
                                    {
                                        if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                        {
                                            pkl.Identified = true;
                                            bExist         = true;
                                            break;
                                        }
                                    }

                                    if (bExist)
                                    {
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    continue;
                                }

                                //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                                //allow one missed scan
                                int naCount = 2;
                                for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (masterRetentionTime - curRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }

                                    if (scanIndex == masterScanIndex)
                                    {
                                        chro.Profiles.Last().Identified = true;
                                    }
                                }
                                chro.Profiles.Reverse();

                                naCount = 2;
                                for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                                {
                                    if (Progress.IsCancellationPending())
                                    {
                                        throw new UserTerminatedException();
                                    }

                                    var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                    if (curRetentionTime - masterRetentionTime > rententionWindow)
                                    {
                                        break;
                                    }

                                    if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                    {
                                        naCount--;
                                        if (naCount == 0)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            continue;
                                        }
                                    }
                                }

                                profileChros.Add(chro);
                            }

                            profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                            profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                            bool bMain = true;
                            foreach (var chro in profileChros)
                            {
                                var filename = bMain ? chro.FileName : chro.SubFileName;
                                if (bMain)
                                {
                                    detected.Add(chro);
                                }

                                bMain = false;

                                new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                                new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                            }
                        }
                    }
                }
                                 );

                var chroList = new List <ChromatographProfile>(detected);
                chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName));

                if (chroList.Count == 0)
                {
                    throw new Exception("Cannot find chromotograph!");
                }

                using (var sw = new StreamWriter(boundaryInput))
                {
                    sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan");
                    foreach (var chro in chroList)
                    {
                        sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}",
                                     Path.GetDirectoryName(chro.FileName).Replace("\\", "/"),
                                     Path.GetFileNameWithoutExtension(chro.FileName),
                                     chro.Experimental,
                                     chro.GetPeptideId(),
                                     chro.TheoreticalMz,
                                     chro.Charge,
                                     chro.IdentifiedScan);
                    }
                }
            }

            if (!File.Exists(options.OutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding boundaries ...");
                var boundaryOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = boundaryInput,
                    OutputFile     = options.OutputFile,
                    RTemplate      = BoundaryR,
                    RExecute       = ExternalProgramConfig.GetExternalProgram("R"),
                    CreateNoWindow = true
                };
                boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString());
                new RTemplateProcessor(boundaryOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            //if (options.DrawImage)
            //{
            //  Progress.SetMessage("Drawing images ...");

            //  var imageOptions = new RTemplateProcessorOptions()
            //  {
            //    InputFile = options.OutputFile,
            //    OutputFile = Path.ChangeExtension(options.OutputFile, ".image"),
            //    RTemplate = ImageR,
            //    RExecute = SystemUtils.GetRExecuteLocation(),
            //    CreateNoWindow = true,
            //    NoResultFile = true
            //  };
            //  new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process();
            //}

            return(new string[] { options.OutputFile });
        }
 protected abstract List <Peak> Resolve(ChromatographProfile chro, List <List <Peak> > envelope);
        public bool Find(PeakList <Peak> ms1, ChromatographProfile chro, double mzTolerancePPM, int minimumProfileLength, ref ChromatographProfileScan result)
        {
            var rawPeaks = ms1.GetRange(chro.IsotopicIons[0].MinimumMzWithinTolerance, chro.IsotopicIons[chro.IsotopicIons.Length - 1].MaximumMzWithinTolerance);

            if (rawPeaks.Count < minimumProfileLength)
            {
                return(false);
            }

            List <List <Peak> > envelope = new List <List <Peak> >();
            int peakIndex = 0;

            foreach (var peak in chro.IsotopicIons)
            {
                List <Peak> findPeaks = new List <Peak>();
                while (peakIndex < rawPeaks.Count)
                {
                    var curPeak = rawPeaks[peakIndex];
                    if (curPeak.Mz < peak.MinimumMzWithinTolerance)
                    {
                        peakIndex++;
                        continue;
                    }

                    if (curPeak.Mz > peak.MaximumMzWithinTolerance)
                    {
                        break;
                    }

                    findPeaks.Add(curPeak);
                    peakIndex++;
                }

                if (findPeaks.Count == 0)
                {
                    break;
                }
                else
                {
                    envelope.Add(findPeaks);
                }
            }

            if (envelope.Count < minimumProfileLength)
            {
                return(false);
            }

            result               = new ChromatographProfileScan();
            result.Scan          = ms1.FirstScan;
            result.RetentionTime = ms1.ScanTimes[0].RetentionTime;
            result.RawPeaks      = new List <Peak>();

            List <Peak> resolved = Resolve(chro, envelope);

            foreach (var findPeak in rawPeaks)
            {
                var isotopicIndex = resolved.IndexOf(findPeak);
                if (isotopicIndex == -1)
                {
                    result.RawPeaks.Add(findPeak);
                }
                else
                {
                    var ion = chro.IsotopicIons[isotopicIndex];
                    result.Add(new ChromatographProfileScanPeak()
                    {
                        Isotopic    = isotopicIndex + 1,
                        Mz          = findPeak.Mz,
                        Intensity   = findPeak.Intensity,
                        Noise       = findPeak.Noise,
                        PPMDistance = PrecursorUtils.mz2ppm(findPeak.Mz, findPeak.Mz - ion.Mz)
                    });
                }
            }

            return(true);
        }