Example #1
0
        private void DoQuantify(string detailDir, List <IIdentifiedSpectrum> querys)
        {
            Dictionary <string, List <IIdentifiedSpectrum> > filePepMap = IdentifiedSpectrumUtils.GetRawPeptideMap(querys);

            int fileCount = 0;

            foreach (string experimental in filePepMap.Keys)
            {
                fileCount++;

                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                string rawFilename = option.RawFormat.GetRawFile(option.RawDir, experimental);

                Progress.SetMessage(MyConvert.Format("{0}/{1} : Processing {2} ...", fileCount, filePepMap.Keys.Count, rawFilename));

                SilacQuantificationFileBuilder builder = new SilacQuantificationFileBuilder(option);
                builder.Progress        = this.Progress;
                builder.SoftwareVersion = this.SoftwareVersion;
                builder.MinScanNumber   = this.MinScanNumber;

                List <IIdentifiedSpectrum> peps = filePepMap[experimental];
                peps.Sort((m1, m2) => m1.IsExtendedIdentification().CompareTo(m2.IsExtendedIdentification()));

                builder.Quantify(rawFilename, peps, detailDir);
            }
        }
Example #2
0
        protected override IIdentifiedResult GetIdentifiedResult(string fileName)
        {
            format = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);
            IIdentifiedResult result;

            if (isSiteLevel)
            {
                result = IdentifiedSpectrumUtils.BuildGroupByPeptide(spectra);
            }
            else
            {
                result = IdentifiedSpectrumUtils.BuildGroupByUniquePeptide(spectra);
            }

            var map = SequenceUtils.ReadAccessNumberReferenceMap(new FastaFormat(), this.fastaFile, this.parser);

            foreach (var group in result)
            {
                var proteins = group[0].Description.Split('/');
                group[0].Description = (from p in proteins
                                        let ac = parser.GetValue(p)
                                                 select map[ac]).ToList().Merge(" ! ");
            }
            return(result);
        }
Example #3
0
        /// <summary>
        /// 获取筛选后,去除相同谱图,假设为不同电荷谱图以后的谱图列表。
        /// </summary>
        /// <returns></returns>
        public List <IIdentifiedSpectrum> GetUnconflictedOptimalSpectra()
        {
            List <IIdentifiedSpectrum> result = GetOptimalSpectra();

            IdentifiedSpectrumUtils.FilterSameSpectrumWithDifferentCharge(result);

            return(result);
        }
Example #4
0
        private double GetPrecursorPPM(List <IIdentifiedSpectrum> spectra)
        {
            double precursorPPM = option.PPMTolerance;

            if (spectra.Count >= 5)
            {
                var systemError = IdentifiedSpectrumUtils.GetDeltaPrecursorPPMAccumulator(spectra);
                if (!double.IsInfinity(systemError.StdDev) && !double.IsNaN(systemError.StdDev))
                {
                    precursorPPM = systemError.StdDev * 3;
                }
            }
            return(precursorPPM);
        }
Example #5
0
        public override void WriteToFile(string filename, List <IIdentifiedSpectrum> t)
        {
            using (var sw = new StreamWriter(filename))
            {
                sw.WriteLine(PeptideFormat.GetHeader());
                foreach (IIdentifiedSpectrum mph in t)
                {
                    sw.WriteLine(PeptideFormat.GetString(mph));
                }

                if (!NotExportSummary)
                {
                    sw.WriteLine();
                    sw.WriteLine("----- summary -----");
                    var totalCount       = IdentifiedSpectrumUtils.GetSpectrumCount(t);
                    var totalUniqueCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(t);
                    sw.WriteLine("Total spectra: " + totalCount);
                    sw.WriteLine("Total peptides: " + totalUniqueCount);

                    var tags = (from s in t
                                select s.Tag).Distinct().ToList();
                    if (tags.Count > 1)
                    {
                        tags.Sort();

                        sw.WriteLine();
                        sw.WriteLine("Tag\tSpectra\tPeptides");
                        sw.WriteLine("All\t{0}\t{1}", totalCount, totalUniqueCount);

                        foreach (var tag in tags)
                        {
                            var tagspectra = from s in t
                                             where s.Tag == tag
                                             select s;
                            sw.WriteLine("{0}\t{1}\t{2}", tag, IdentifiedSpectrumUtils.GetSpectrumCount(tagspectra), IdentifiedSpectrumUtils.GetUniquePeptideCount(tagspectra));
                        }
                    }
                }
            }

            string enzymeFile = filename + ".enzyme";

            new ProteaseFile().Write(enzymeFile, t);
        }
        public override IEnumerable <string> Process(string peptidesFilename)
        {
            List <IIdentifiedSpectrum> peptides = new SequestPeptideTextFormat().ReadFromFile(peptidesFilename);

            Dictionary <string, List <IIdentifiedSpectrum> > rawPeptideMap = IdentifiedSpectrumUtils.GetRawPeptideMap(peptides);

            Progress.SetRange(1, 1, rawPeptideMap.Count);

            List <string> raws = new List <string>(rawPeptideMap.Keys);

            raws.Sort();

            int position  = 0;
            int totalRaws = rawPeptideMap.Count;

            foreach (string raw in raws)
            {
                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                Progress.SetPosition(1, position++);
                Progress.SetMessage(1, MyConvert.Format("{0}/{1}, Extracting {2} for {3} peptides ...", position, totalRaws, raw, rawPeptideMap[raw].Count));

                if (!filePathMap.ContainsKey(raw))
                {
                    throw new Exception("Cannot find raw dtas/outs file for " + raw);
                }

                if (filePathMap[raw].Count == 1)
                {
                    ExtractSingleRaw(raw, filePathMap[raw][0], rawPeptideMap[raw]);
                }
                else
                {
                    ExtractMultipleRaw(raw, filePathMap[raw], rawPeptideMap[raw]);
                }
            }

            Progress.SetPosition(1, position);

            return(new List <string>());
        }
        protected void DoAfterParse(List <IIdentifiedSpectrum> result)
        {
            Progress.SetMessage("Remove same spectrum matched with different peptides ...");
            IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(result);

            if (Options.FilterByPrecursor && Options.FilterByPrecursorDynamicTolerance)
            {
                Progress.SetMessage(MyConvert.Format("Filtering by precursor mass tolerance ...", result.Count));
                List <IIdentifiedSpectrum> highconfidents = Options.SearchEngine.GetFactory().GetHighConfidentPeptides(result);
                DynamicPrecursorPPMFilter  filter         = new DynamicPrecursorPPMFilter(Options.PrecursorPPMTolerance, Options.FilterByPrecursorIsotopic);
                var sFilter = filter.GetFilter(highconfidents);
                result.RemoveAll(m => !sFilter.Accept(m));
            }

            if (Options.Parent != null && Options.Parent.Database.RemoveContamination)
            {
                var filter = Options.Parent.Database.GetContaminationNameFilter();

                if (null != filter)
                {
                    Progress.SetMessage(MyConvert.Format("Filtering by contamination name : {0}", filter.ToString()));
                    result.RemoveAll(m => filter.Accept(m));
                }
            }

            if (Options.SearchedByDifferentParameters)
            {
                Progress.SetMessage(MyConvert.Format("Merging same spectrum from different search parameters ...", result.Count));
                IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(result, Options.ScoreFunction);
                result.TrimExcess();
                GC.Collect();
                GC.WaitForPendingFinalizers();
                Progress.SetMessage(MyConvert.Format("Total {0} peptides passed minimum tolerance criteria.", result.Count));
            }

            Progress.SetMessage("Parsing protein access number ...");
            IdentifiedSpectrumUtils.ResetProteinByAccessNumberParser(result, Options.Parent.Database.GetAccessNumberParser());
            Progress.SetMessage("Parsing protein access number finished.");

            Progress.SetMessage("Sorting peptide sequence ...");
            result.ForEach(m => m.SortPeptides());
            Progress.SetMessage("Sorting peptide sequence finished.");
        }
Example #8
0
        public void WriteToFile(string filename, List <IIdentifiedSpectrum> spectra)
        {
            using (var sw = new StreamWriter(filename))
            {
                sw.WriteLine(PeptideFormat.GetHeader());
                foreach (IIdentifiedSpectrum pep in spectra)
                {
                    sw.WriteLine(PeptideFormat.GetString(pep));
                }
            }

            using (var sw = new StreamWriter(filename + ".summary"))
            {
                WriteSummary(sw, IdentifiedSpectrumUtils.GetSpectrumCount(spectra), IdentifiedSpectrumUtils.GetUniquePeptideCount(spectra));
            }

            string enzymeFile = filename + ".enzyme";

            new ProteaseFile().Write(enzymeFile, spectra);
        }
        public override IEnumerable <string> Process(string targetFilename)
        {
            using (StreamWriter sw = new StreamWriter(targetFilename))
            {
                HashSet <string> unique = new HashSet <string> ();
                int totalSpectrumCount  = 0;

                Progress.SetRange(1, sourceFiles.Length);
                int count = 0;
                LineFormat <IIdentifiedSpectrum> pepFormat = null;
                foreach (string sourceFile in sourceFiles)
                {
                    Progress.SetMessage("Processing " + sourceFile + " ...");

                    var spectra = format.ReadFromFile(sourceFile);

                    totalSpectrumCount += spectra.Count;
                    unique.UnionWith(IdentifiedSpectrumUtils.GetUniquePeptide(spectra));

                    if (count == 0)
                    {
                        pepFormat = format.PeptideFormat;
                        sw.WriteLine(pepFormat.GetHeader());
                    }

                    spectra.ForEach(m => sw.WriteLine(pepFormat.GetString(m)));

                    count++;
                    Progress.SetPosition(count);
                }

                format.WriteSummary(sw, totalSpectrumCount, unique.Count);

                Progress.End();
            }

            return(new[] { targetFilename });
        }
Example #10
0
        public IEnumerable <string> Process(string filename)
        {
            List <IIdentifiedSpectrum> sphs = new SequestPeptideTextFormat().ReadFromFile(filename);
            Dictionary <int, List <IIdentifiedSpectrum> > chargeSphMap = IdentifiedSpectrumUtils.GetChargeMap(sphs);
            var result = new List <string>();

            foreach (int charge in chargeSphMap.Keys)
            {
                List <IIdentifiedSpectrum> sphList = chargeSphMap[charge];

                double maxDeltaScore = 1.0;

                double maxScore = 0.0;
                foreach (IIdentifiedSpectrum sph in sphList)
                {
                    if (sph.Score > maxScore)
                    {
                        maxScore = sph.Score;
                    }
                }
                maxScore += 1.0;

                string   resultFilename = filename + "." + charge + ".png";
                var      bmp            = new Bitmap(this.width, this.height);
                Graphics g = Graphics.FromImage(bmp);
                g.FillRectangle(new SolidBrush(Color.White), new Rectangle(0, 0, this.width, this.height));
                var    font       = new Font("Times New Roman", 8);
                double fontHeight = font.GetHeight(g);

                g.DrawString("Score", font, new SolidBrush(Color.Black), GetX(maxScore, maxScore) - 20,
                             GetY(maxDeltaScore, 0) + 10);
                g.DrawString("DeltaScore", font, new SolidBrush(Color.Black), GetX(maxScore, 0) - 20,
                             GetY(maxDeltaScore, maxDeltaScore) - (int)fontHeight - 5);

                DrawXScale(maxScore, maxDeltaScore, g, font);
                DrawYScale(maxScore, maxDeltaScore, g, font);

                var colors = new HashSet <Color>();
                foreach (IIdentifiedSpectrum sph in sphList)
                {
                    Color color = GetColor(sph);
                    if (!colors.Contains(color))
                    {
                        colors.Add(color);
                    }

                    int x = GetX(maxScore, sph.Score);
                    var y = (int)(this.height - sph.DeltaScore / maxDeltaScore * this.height - this.top);
                    g.FillEllipse(new SolidBrush(color), new Rectangle(x - 1, y - 1, 3, 3));
                    //Console.WriteLine(MyConvert.Format("{0:0.0000}\t{1:0.0000}\t{2}\t{3}\t{4}\t{5}",
                    //  sph.Score,
                    //  sph.DeltaScore,
                    //  color.Name,
                    //  x,
                    //  y,
                    //  sph.PeptideInfo[0].Proteins[0]));
                }

                DrawColorTitle(maxScore, maxDeltaScore, g, font, colors);

                g.Save();
                bmp.Save(resultFilename, ImageFormat.Png);
                result.Add(resultFilename);
            }
            return(result);
        }
Example #11
0
        protected override List <IIdentifiedSpectrum> DoParse()
        {
            var peptideFormat = GetPeptideFormat();

            Progress.SetRange(0, options.PathNames.Count + 1);

            var result = new List <IIdentifiedSpectrum>();

            var spectrumFilter = options.GetFilter();

            //long afterFirstMemory = 0;
            //DateTime afterFirstTime = DateTime.Now;

            int stepCount = 0;

            foreach (string dataFile in options.PathNames)
            {
                stepCount++;

                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                var dataParser = GetParser(dataFile);

                dataParser.Progress = this.Progress;

                if (options is AbstractTitleDatasetOptions)
                {
                    dataParser.TitleParser = (options as AbstractTitleDatasetOptions).GetTitleParser();
                }

                List <IIdentifiedSpectrum> curPeptides;

                string peptideFilename = GetPeptideFile(dataFile);
                if (new FileInfo(peptideFilename).Exists)
                {
                    Progress.SetMessage(MyConvert.Format("{0}/{1} : Reading peptides file {2}", stepCount, options.PathNames.Count, Path.GetFileName(peptideFilename)));
                    curPeptides = peptideFormat.ReadFromFile(peptideFilename);
                    if (curPeptides.All(m => m.Proteins.Count == 0))
                    {
                        var proteinFile = peptideFilename + ".protein";
                        if (File.Exists(proteinFile))
                        {
                            IdentifiedSpectrumUtils.FillProteinInformation(curPeptides, proteinFile);
                        }
                        else
                        {
                            throw new Exception(string.Format("No protein information in peptides file {0} and no corresponding protein file exists {1}",
                                                              peptideFilename,
                                                              proteinFile));
                        }
                    }

                    if (curPeptides.All(m => string.IsNullOrEmpty(m.Query.FileScan.Experimental)))
                    {
                        curPeptides.ForEach(m => m.Query.FileScan.Experimental = Path.GetFileNameWithoutExtension(dataFile));
                    }

                    if (curPeptides.All(m => m.Query.FileScan.FirstScan == 0))
                    {
                        if (curPeptides.All(m =>
                        {
                            int value;
                            if (int.TryParse(m.Id, out value))
                            {
                                return(value > 0);
                            }
                            else
                            {
                                return(false);
                            }
                        }))
                        {
                            curPeptides.ForEach(m => m.Query.FileScan.FirstScan = int.Parse(m.Id));
                        }
                        else if (curPeptides.All(m => m.Query.QueryId > 0))
                        {
                            curPeptides.ForEach(m => m.Query.FileScan.FirstScan = m.Query.QueryId);
                        }
                    }
                }
                else
                {
                    Progress.SetMessage(MyConvert.Format("{0}/{1} : Parsing {2} file {3}", stepCount, options.PathNames.Count, dataParser.Engine, dataFile));
                    curPeptides = dataParser.ReadFromFile(dataFile);
                    peptideFormat.WriteToFile(peptideFilename, curPeptides);
                }

                int curPeptideCount = curPeptides.Count;

                if (null != spectrumFilter)
                {
                    curPeptides.RemoveAll(m => !spectrumFilter.Accept(m));
                }

                curPeptides.ForEach(m =>
                {
                    m.Tag    = options.Name;
                    m.Engine = options.SearchEngine.ToString();
                });

                result.AddRange(curPeptides);
                curPeptides = null;
                GC.Collect();
                GC.WaitForPendingFinalizers();

                //if (stepCount == 1)
                //{
                //  afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                //  afterFirstTime = DateTime.Now;
                //}
                //else
                //{
                //  long currMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                //  double averageCost = (double)(currMemory - afterFirstMemory) / (stepCount - 1);
                //  double estimatedCost = afterFirstMemory + averageCost * options.PathNames.Count;

                //  DateTime currTime = DateTime.Now;
                //  var averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / (stepCount - 1);
                //  var finishTime = afterFirstTime.AddMinutes(averageTime * (options.PathNames.Count - 1));
                //  Console.WriteLine("{0}/{1}, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", stepCount, options.PathNames.Count, currMemory, averageCost, estimatedCost, finishTime.ToString());
                //}
            }
            return(result);
        }
        public void InitFromOptions(DatasetListOptions dsOptions, IProgressCallback progress, string paramFile)
        {
            this.Clear();

            this.conflictFunc = dsOptions.Options.GetConflictFunc();

            this.fdrCalc = dsOptions.Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
            IFilter <IIdentifiedSpectrum> decoyFilter = null;

            if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
            {
                decoyFilter = dsOptions.Options.GetDecoySpectrumFilter();
            }

            this.dsOptions = dsOptions;

            long     afterFirstMemory = 0;
            DateTime afterFirstTime   = DateTime.Now;
            var      totalCount       = dsOptions.Sum(l => l.PathNames.Count);
            var      usedCount        = 0;

            for (int i = 0; i < dsOptions.Count; i++)
            {
                var m       = dsOptions[i];
                var builder = m.GetBuilder();

                builder.Progress = progress;

                Dataset ds = new Dataset(m);

                //首先,获取所有通过了固定筛选标准的谱图。
                ds.Spectra = builder.ParseFromSearchResult();
                ds.PSMPassedFixedCriteriaCount = ds.Spectra.Count;

                if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //对每个谱图设置是否来自诱饵库
                    progress.SetMessage("Assigning decoy information...");
                    DecoyPeptideBuilder.AssignDecoy(ds.Spectra, decoyFilter);
                    var decoyCount = ds.Spectra.Count(l => l.FromDecoy);
                    if (decoyCount == 0)
                    {
                        throw new Exception(string.Format("No decoy protein found at dataset {0}, make sure the protein access number parser and the decoy pattern are correctly defined!", m.Name));
                    }

                    progress.SetMessage("{0} decoys out of {1} hits found", decoyCount, ds.Spectra.Count);

                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("Before maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));

                    progress.SetMessage("Filtering by maximum peptide fdr {0} ...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.Spectra = ds.GetUnconflictedOptimalSpectra();
                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("After maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));
                }

                this.Add(ds);

                if (i == 0)
                {
                    afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    afterFirstTime   = DateTime.Now;
                }
                else
                {
                    usedCount += m.PathNames.Count;

                    long   currMemory    = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    double averageCost   = (double)(currMemory - afterFirstMemory) / usedCount;
                    double estimatedCost = afterFirstMemory + averageCost * totalCount;

                    DateTime currTime    = DateTime.Now;
                    var      averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / usedCount;
                    var      finishTime  = afterFirstTime.AddMinutes(averageTime * (totalCount - dsOptions[0].PathNames.Count));
                    progress.SetMessage("{0}/{1} datasets, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", (i + 1), dsOptions.Count, currMemory, averageCost, estimatedCost, finishTime);
                }
            }

            //初始化实验列表
            this.ForEach(m => m.InitExperimentals());

            if (dsOptions.Count > 1)
            {
                if (dsOptions.Options.KeepTopPeptideFromSameEngineButDifferentSearchParameters)
                {
                    //合并/删除那些相同搜索引擎,不同参数得到的结果。
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(peptides, score), false);
                }
                else
                {
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(peptides, score), true);
                }

                //初始化不同搜索引擎搜索的dataset之间的overlap关系。
                this.OverlapBySearchEngine = FindOverlap((m1, m2) => m1.Options.SearchEngine != m2.Options.SearchEngine);


                //初始化没有交集的dataset
                var overlaps = new HashSet <Dataset>(from m in OverlapBySearchEngine
                                                     from s in m
                                                     select s);
                this.NoOverlaps = this.Where(m => !overlaps.Contains(m)).ToList();

                if (OverlapBySearchEngine.Count > 0 && dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //根据最大的fdr进行筛选。
                    progress.SetMessage("Filtering PSMs by maximum fdr {0}, considering multiple engine overlap...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    var realFdr = this.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    if (realFdr.ConflictSpectra.Count > 0)
                    {
                        new MascotPeptideTextFormat(UniformHeader.PEPTIDE_HEADER).WriteToFile(Path.ChangeExtension(paramFile, ".conflicted.peps"), realFdr.ConflictSpectra);
                    }

                    //保留每个dataset的spectra为筛选后的结果,以用于后面的迭代。
                    this.ForEach(m =>
                    {
                        m.Spectra = m.GetUnconflictedOptimalSpectra();
                    });
                }
            }
            else
            {
                this.NoOverlaps            = new List <Dataset>(this);
                this.OverlapBySearchEngine = new List <List <Dataset> >();
            }
        }