Exemplo n.º 1
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format = new MascotPeptideTextFormat();

            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.QValue >= fdr);
            peptides.ForEach(m => m.TheoreticalMinusExperimentalMass = Math.Round(m.TheoreticalMinusExperimentalMass));
            peptides.RemoveAll(m => m.TheoreticalMinusExperimentalMass == 0.0);

            var result1 = MyConvert.Format("{0}.fdr{1:0.000}.txt", fileName, fdr);

            format.WriteToFile(result1, peptides);

            var groups = peptides.GroupBy(m => m.TheoreticalMinusExperimentalMass).ToList();

            groups.Sort((m1, m2) => - m1.Count().CompareTo(m2.Count()));

            var result2 = MyConvert.Format("{0}.fdr{1:0.000}.groups", fileName, fdr);

            using (StreamWriter sw = new StreamWriter(result2))
            {
                foreach (var group in groups)
                {
                    sw.WriteLine("{0:0}\t{1}", -group.Key, group.Count());
                }
            }

            return(new string[] { result1, result2 });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            var resultpeptides = peptides.FindAll(m =>
            {
                bool bNormal = false;
                bool bSnp    = false;
                foreach (var p in m.Proteins)
                {
                    if (!regex.Match(p).Success)
                    {
                        bNormal = true;
                    }
                    else
                    {
                        bSnp = true;
                    }
                }
                return(!bNormal && bSnp);
            });

            var result = FileUtils.ChangeExtension(fileName, ".snp.peptides");

            format.WriteToFile(result, resultpeptides);

            return(new string[] { result });
        }
Exemplo n.º 3
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.Proteins.Any(n => decoyReg.Match(n).Success));

            var result = FileUtils.ChangeExtension(fileName, ".target.peptides");

            format.WriteToFile(result, peptides);

            return(new string[] { result });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Reading mutation file ...");
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);

            var quanFormat = new MascotResultTextFormat();

            quanFormat.Progress = this.Progress;
            Progress.SetMessage("Reading quantification file ...");
            var ir = quanFormat.ReadFromFile(quantificationFile);

            if (ir.Count == 0)
            {
                throw new Exception("No quantification found!");
            }

            foreach (var pep in spectra)
            {
                var mutSeq     = pep.Peptide.PureSequence.Replace('I', 'L');
                var mutProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(mutSeq)));

                if (mutProtein != null)
                {
                    AddRatio(pep, mutProtein, "MUL_");
                }

                var oriSeq     = pep.Annotations["OriginalSequence"] as string;
                var oriProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(oriSeq)));

                if (oriProtein != null)
                {
                    AddRatio(pep, oriProtein, "ORI_");
                }
            }

            format.Initialize(spectra);

            var result = fileName + ".quantification";

            Progress.SetMessage("Writing peptide quantification file ...");
            format.WriteToFile(result, spectra);

            return(new string[] { result });
        }
        public override IEnumerable <string> Process()
        {
            var spectra   = new PercolatorOutputXmlPsmReader().ReadFromFile(_options.PercolatorOutputFile);
            var inputspec = new PercolatorInputXmlPsmReader().ReadFromFile(_options.PercolatorInputFile);
            var scanMap   = inputspec.ToDictionary(m => GetPsmId(m));

            spectra.ForEach(m =>
            {
                var psmid                  = GetPsmId(m);
                var inputScan              = scanMap[psmid];
                m.Query.QueryId            = inputScan.Query.QueryId;
                m.Query.FileScan.FirstScan = m.Query.QueryId;
                m.Query.FileScan.LastScan  = m.Query.QueryId;
                m.Query.Charge             = inputScan.Query.Charge;
                m.ExperimentalMH           = inputScan.ExperimentalMH;
                m.TheoreticalMH            = inputScan.TheoreticalMH;
                m.NumMissedCleavages       = inputScan.NumMissedCleavages;
                m.Score = inputScan.Score;
            });
            var specMap = spectra.GroupBy(m => m.Query.QueryId).ToList();
            var result  = new List <IIdentifiedSpectrum>();

            foreach (var spec in specMap)
            {
                if (spec.Count() == 1)
                {
                    result.Add(spec.First());
                }
                else
                {
                    var lst = spec.OrderByDescending(m => m.SpScore).ToList();
                    if (lst[1].SpScore < lst[0].SpScore)
                    {
                        result.Add(lst[0]);
                    }
                    else
                    {
                        if (lst[0].FromDecoy)
                        {
                            result.Add(lst[0]);
                        }
                        else if (lst[1].FromDecoy)
                        {
                            result.Add(lst[1]);
                        }
                        else
                        {
                            lst[0].AddPeptide(lst[1].Peptide);
                            result.Add(lst[0]);
                        }
                    }
                }
            }

            result.Sort((m1, m2) => m2.SpScore.CompareTo(m1.SpScore));

            var format = new MascotPeptideTextFormat("QueryId\tSpectrumId\tFileScan\tSequence\tCharge\tScore\tSvmScore\tMissCleavage\tQValue\tTheoreticalMH\tExperimentMH\tTarget/Decoy");

            var targetFile = _options.PercolatorOutputFile + ".peptides";

            format.WriteToFile(targetFile, result);

            new QValueCalculator(new PercolatorScoreFunction(), new TargetFalseDiscoveryRateCalculator()).CalculateQValue(result);
            result.RemoveAll(m => m.QValue >= 0.01);
            var target001file = FileUtils.ChangeExtension(targetFile, ".FDR0.01.peptides");

            format.WriteToFile(target001file, result);

            return(new[] { targetFile });
        }
Exemplo n.º 6
0
        public override IEnumerable <string> Process()
        {
            //Extract chromotagraph information
            var chroOptions = new ChromatographProfileBuilderOptions();

            options.CopyProperties(chroOptions);
            chroOptions.InputFile  = options.InputFile;
            chroOptions.OutputFile = options.BoundaryOutputFile;
            chroOptions.DrawImage  = false;
            var builder = new ChromatographProfileBuilder(chroOptions);

            if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding envelope ...");
                builder.Progress = this.Progress;
                builder.Process();
            }

            //Calculate deuterium enrichment for peptide
            if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Calculating deuterium ...");
                var deuteriumOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = options.BoundaryOutputFile,
                    OutputFile     = options.DeuteriumOutputFile,
                    RTemplate      = DeuteriumR,
                    RExecute       = SystemUtils.GetRExecuteLocation(),
                    CreateNoWindow = true
                };

                deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0"));

                new RTemplateProcessor(deuteriumOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString());

            //Read old spectra information
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(options.InputFile);

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("RetentionTime");
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var calcSpectra = new List <IIdentifiedSpectrum>();
            var aas         = new Aminoacids();

            foreach (var pep in spectra)
            {
                var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep));
                if (deuteriumMap.ContainsKey(filename))
                {
                    var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence);
                    var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string);

                    pep.Annotations["PeakRetentionTime"]          = deuteriumMap[filename].Annotations["RetentionTime"];
                    pep.Annotations["TheoreticalDeuterium"]       = deuteriumMap[filename].Annotations["TheoreticalDeuterium"];
                    pep.Annotations["ObservedDeuterium"]          = deuteriumMap[filename].Annotations["ObservedDeuterium"];
                    pep.Annotations["NumDeuteriumIncorporated"]   = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"];
                    pep.Annotations["NumExchangableHydrogen"]     = numExchangeableHydrogens;
                    pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens;

                    calcSpectra.Add(pep);
                }
            }
            format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent";
            format.NotExportSummary      = true;
            format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra);

            var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList();

            var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var peptide in specGroup)
                {
                    var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray());
                    if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l)))
                    {
                        continue;
                    }

                    sw.Write(peptide.Key);

                    foreach (var time in times)
                    {
                        if (curSpectra.ContainsKey(time))
                        {
                            var deps      = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Peptide deuterium enrichment calculation finished ...");

            return(new string[] { options.OutputFile });
        }
Exemplo n.º 7
0
        public override IEnumerable <string> Process()
        {
            //Prepare unique peptide file
            var format   = new MascotResultTextFormat();
            var proteins = format.ReadFromFile(options.InputFile);

            proteins.RemoveAmbiguousSpectra();

            var spectra = proteins.GetSpectra();

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var peptideFile   = Path.ChangeExtension(options.InputFile, ".unique.peptides");
            var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers);

            peptideFormat.WriteToFile(peptideFile, spectra);

            //Calculate deterium enrichment at peptide level
            var pepOptions = new DeuteriumCalculatorOptions();

            options.CopyProperties(pepOptions);
            pepOptions.InputFile  = peptideFile;
            pepOptions.OutputFile = peptideFile + ".tsv";

            var pepCalc = new PeptideDeuteriumCalculator(pepOptions);

            pepCalc.Progress = this.Progress;
            pepCalc.Process();

            //Copy annotation from calculated peptide to original peptide
            var calcSpectra   = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile());
            var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName);

            foreach (var calcSpec in calcSpectra)
            {
                var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName];
                foreach (var ann in calcSpec.Annotations)
                {
                    oldSpec.Annotations[ann.Key] = ann.Value;
                }
            }

            //Remove the peptide not contain calculation result
            for (int i = proteins.Count - 1; i >= 0; i--)
            {
                foreach (var protein in proteins[i])
                {
                    protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent"));
                }

                if (proteins[i][0].Peptides.Count == 0)
                {
                    proteins.RemoveAt(i);
                }
            }

            format.PeptideFormat = peptideFormat.PeptideFormat;

            var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv");

            format.WriteToFile(noredundantFile, proteins);

            var times    = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();
            var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv");

            using (var sw = new StreamWriter(timeFile))
            {
                sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var protein in proteins)
                {
                    var curSpectra = protein[0].GetSpectra();
                    if (options.PeptideInAllTimePointOnly)
                    {
                        var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence);
                        curSpectra.Clear();
                        foreach (var peps in curMap.Values)
                        {
                            var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);
                            if (times.All(time => pepMap.ContainsKey(time)))
                            {
                                curSpectra.AddRange(peps);
                            }
                        }
                    }

                    if (curSpectra.Count == 0)
                    {
                        continue;
                    }

                    sw.Write((from p in protein select p.Name).Merge("/"));
                    var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);

                    foreach (var time in times)
                    {
                        if (curTimeMap.ContainsKey(time))
                        {
                            var deps      = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Calculating ratio consistant ...");
            var deuteriumOptions = new RTemplateProcessorOptions()
            {
                InputFile      = timeFile,
                OutputFile     = options.OutputFile,
                RTemplate      = RatioR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(deuteriumOptions)
            {
                Progress = this.Progress
            }.Process();

            Progress.SetMessage("Finished ...");

            return(new string[] { options.OutputFile });
        }
        public override IEnumerable <string> Process(string peptideFile)
        {
            Progress.SetMessage("Loading peptide file {0}...", peptideFile);

            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(peptideFile);

            var map = peptides.ToDictionary(p => GetScan(p.Query.FileScan));

            var pepMap = new Dictionary <string, List <IIdentifiedSpectrum> >();

            Regex silac = new Regex(@"\.((?:iso|sil\d))_\d+.msm");

            Dictionary <string, StreamWriter> swmap = new Dictionary <string, StreamWriter>();

            try
            {
                int count = 0;
                foreach (var msmFile in sourceFiles)
                {
                    string resultFileName = GetResultFilename(silac, msmFile, peptideFile);
                    if (!swmap.ContainsKey(resultFileName))
                    {
                        swmap[resultFileName] = null;
                    }

                    count++;

                    Progress.SetMessage("Parsing {0}/{1} : {2} ...", count, sourceFiles.Length, msmFile);
                    using (var sr = new StreamReader(msmFile))
                    {
                        Progress.SetRange(0, sr.BaseStream.Length);
                        MascotGenericFormatSectionReader reader = new MascotGenericFormatSectionReader(sr);
                        while (reader.HasNext() && map.Count > 0)
                        {
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }

                            string title = reader.GetNextTitle();
                            var    scan  = GetScan(parser.GetValue(title));
                            if (map.ContainsKey(scan))
                            {
                                var spectrum = map[scan];
                                var section  = reader.Next();

                                var sw = swmap[resultFileName];
                                if (sw == null)
                                {
                                    sw = new StreamWriter(resultFileName);
                                    swmap[resultFileName] = sw;

                                    pepMap[resultFileName] = new List <IIdentifiedSpectrum>();
                                }

                                section.ForEach(m => sw.WriteLine(m));
                                pepMap[resultFileName].Add(spectrum);

                                map.Remove(scan);
                            }
                            else
                            {
                                reader.SkipNext();
                            }

                            Progress.SetPosition(sr.BaseStream.Position);
                        }
                    }
                }
            }
            finally
            {
                foreach (var sw in swmap.Values)
                {
                    if (sw != null)
                    {
                        sw.Close();
                    }
                }
            }

            var result = new List <string>(from k in swmap
                                           where k.Value != null
                                           select k.Key);

            foreach (var pep in pepMap)
            {
                var pepFilename = FileUtils.ChangeExtension(pep.Key, ".peptides");
                format.WriteToFile(pepFilename, pep.Value);
            }

            if (map.Count > 0)
            {
                var missed = peptideFile + ".missed";
                result.Add(missed);
                format.WriteToFile(missed, map.Values.ToList());
            }

            return(result);
        }