public override IEnumerable <string> Process(string fileName)
        {
            var peptides = new MascotPeptideTextFormat().ReadFromFile(fileName);

            var seqs = (from p in peptides
                        let pep = p.Peptide
                                  select PeptideUtils.GetMatchedSequence(pep.Sequence)).ToList();

            var nmod = (from seq in seqs
                        where !char.IsLetter(seq[0])
                        select seq).Count();

            var kseqs = (from seq in seqs
                         where seq.Contains(modificationAminoacid)
                         select seq).Count();

            var kmod = (from seq in seqs
                        where seq.Contains(modificationAminoacid) && IsFullModifiedK(seq)
                        select seq).Count();

            var result = fileName + ".labelingEfficiency";

            using (StreamWriter sw = new StreamWriter(result))
            {
                sw.WriteLine("Total PSMs\t{0}", peptides.Count);
                sw.WriteLine("N-terminal modified PSMs\t{0}", nmod);
                sw.WriteLine("{0}-contained PSMs\t{1}", modificationAminoacid, kseqs);
                sw.WriteLine("{0}-full-modified PSMs\t{2}", modificationAminoacid, kmod);
            }

            return(new string[] { result });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            var resultpeptides = peptides.FindAll(m =>
            {
                bool bNormal = false;
                bool bSnp    = false;
                foreach (var p in m.Proteins)
                {
                    if (!regex.Match(p).Success)
                    {
                        bNormal = true;
                    }
                    else
                    {
                        bSnp = true;
                    }
                }
                return(!bNormal && bSnp);
            });

            var result = FileUtils.ChangeExtension(fileName, ".snp.peptides");

            format.WriteToFile(result, resultpeptides);

            return(new string[] { result });
        }
Exemplo n.º 3
0
        protected override IIdentifiedResult GetIdentifiedResult(string fileName)
        {
            format = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);
            IIdentifiedResult result;

            if (isSiteLevel)
            {
                result = IdentifiedSpectrumUtils.BuildGroupByPeptide(spectra);
            }
            else
            {
                result = IdentifiedSpectrumUtils.BuildGroupByUniquePeptide(spectra);
            }

            var map = SequenceUtils.ReadAccessNumberReferenceMap(new FastaFormat(), this.fastaFile, this.parser);

            foreach (var group in result)
            {
                var proteins = group[0].Description.Split('/');
                group[0].Description = (from p in proteins
                                        let ac = parser.GetValue(p)
                                                 select map[ac]).ToList().Merge(" ! ");
            }
            return(result);
        }
        /*
         * public IPeptideMassCalculator GetPeptideMassCalculator(MascotModificationItem dynamicModification)
         * {
         * bool isMono = true;
         *
         * var aas = new Aminoacids();
         * staticModification.ForEach(m => aas[aas].ResetMass(aas[m].MonoMass + staticModifications[aa], aas[aa].AverageMass + staticModifications[aa]);
         * }
         *
         * var diff = new[] { '*', '#', '@', '^', '~', '$' };
         * int i = 0;
         * foreach (double mod in Diff_search_options.Values)
         * {
         *  aas[diff[i++]].ResetMass(mod, mod);
         * }
         *
         * double nterm = isMono ? Atom.H.MonoMass : Atom.H.AverageMass;
         * double cterm = isMono ? Atom.H.MonoMass + Atom.O.MonoMass : Atom.H.AverageMass + Atom.O.AverageMass;
         *
         * if (this.term_diff_search_options.First != 0.0 || this.term_diff_search_options.Second != 0.0)
         * {
         *  throw new Exception(
         *    "Term dynamic modification has not been implemented into this function, call author to fix it.");
         * }
         *
         * IPeptideMassCalculator result;
         * if (isMono)
         * {
         *  result = new MonoisotopicPeptideMassCalculator(aas, nterm, cterm);
         * }
         * else
         * {
         *  result = new AveragePeptideMassCalculator(aas, nterm, cterm);
         * }
         *
         * return result;
         * }
         */
        public override IEnumerable <string> Process(string fileName)
        {
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);

            return(null);
        }
Exemplo n.º 5
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format = new MascotPeptideTextFormat();

            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.QValue >= fdr);
            peptides.ForEach(m => m.TheoreticalMinusExperimentalMass = Math.Round(m.TheoreticalMinusExperimentalMass));
            peptides.RemoveAll(m => m.TheoreticalMinusExperimentalMass == 0.0);

            var result1 = MyConvert.Format("{0}.fdr{1:0.000}.txt", fileName, fdr);

            format.WriteToFile(result1, peptides);

            var groups = peptides.GroupBy(m => m.TheoreticalMinusExperimentalMass).ToList();

            groups.Sort((m1, m2) => - m1.Count().CompareTo(m2.Count()));

            var result2 = MyConvert.Format("{0}.fdr{1:0.000}.groups", fileName, fdr);

            using (StreamWriter sw = new StreamWriter(result2))
            {
                foreach (var group in groups)
                {
                    sw.WriteLine("{0:0}\t{1}", -group.Key, group.Count());
                }
            }

            return(new string[] { result1, result2 });
        }
Exemplo n.º 6
0
        public void TestSameEngineDifferentParameters()
        {
            ClassificationOptions co = new ClassificationOptions();

            co.ClassifyByCharge             = true;
            co.ClassifyByMissCleavage       = true;
            co.ClassifyByModification       = true;
            co.ModifiedAminoacids           = "STY";
            co.ClassifyByNumProteaseTermini = true;

            var s1 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic.peptides");

            IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1);

            s1.ForEach(m => m.Tag = "deisotopic");
            var s2 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic-top10.peptides");

            IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2);
            s2.ForEach(m => m.Tag = "deisotopic-top");

            var all = s1.Union(s2).ToList();

            var p1 = new List <IIdentifiedSpectrum>(all);

            IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction());

            p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
            var bin1 = co.BuildSpectrumBin(p1);

            var p2 = new List <IIdentifiedSpectrum>(all);

            IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction());

            p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
            var bin2 = co.BuildSpectrumBin(p2);

            bin2.ForEach(m =>
            {
                IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction());

                var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString()));
                Assert.AreEqual(m.Spectra.Count, n.Spectra.Count);
                //{
                //  if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2"))
                //  {
                //    Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992")));
                //  }

                //  var diff1 = m.Spectra.Except(n.Spectra).ToList();
                //  Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString());
                //  diff1.ForEach(k =>
                //  {
                //    var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName));
                //    lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence));
                //  });
                //}
            });
        }
Exemplo n.º 7
0
        public void TestFillProteinInformation()
        {
            var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/Test.output.xml.FDR0.01.peptides");

            Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0));

            IdentifiedSpectrumUtils.FillProteinInformation(peptides, TestContext.CurrentContext.TestDirectory + "/../../../data//Test.output.xml.FDR0.01.peptides.proteins");
            Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0));
        }
Exemplo n.º 8
0
        public void TestCalculateQValue()
        {
            var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/QTOF_Ecoli.LowRes.t.xml.peptides");

            peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6);
            peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_")));

            IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator());

            Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001);
        }
Exemplo n.º 9
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.Proteins.Any(n => decoyReg.Match(n).Success));

            var result = FileUtils.ChangeExtension(fileName, ".target.peptides");

            format.WriteToFile(result, peptides);

            return(new string[] { result });
        }
        protected override void PrepareBeforeProcessing(string peptideFile)
        {
            Progress.SetMessage("Reading peptides from " + peptideFile);

            var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile);

            var expMap = peptides.GroupBy(m => m.Query.FileScan.Experimental).ToDictionary(m => m.Key);

            expPPMMap = (from exp in expMap
                         let mean = Statistics.Mean(from pep in exp.Value select PrecursorUtils.mz2ppm(pep.TheoreticalMass, pep.TheoreticalMinusExperimentalMass))
                                    orderby exp.Key descending
                                    select new Pair <string, double>(exp.Key, mean)).ToList();
        }
        public override IEnumerable <string> Process()
        {
            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + options.PeptideFile + " ...");
            var spectra = format.ReadFromFile(options.PeptideFile);
            var seqMap  = new Dictionary <string, IIdentifiedPeptide>();

            foreach (var spec in spectra)
            {
                seqMap[spec.Peptide.PureSequence] = spec.Peptide;
            }

            var aas = (from c in new Aminoacids().GetVisibleAminoacids()
                       where c != 'I'
                       select c.ToString()).Merge("");

            var ff = new FastaFormat();

            Progress.SetMessage("inserting amino acid ...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine(File.ReadAllText(options.DatabaseFile));

                var seqs           = seqMap.Keys.OrderBy(m => m).ToArray();
                var reversed_index = 1000000;
                foreach (var seq in seqs)
                {
                    for (int i = 0; i < seq.Length; i++)
                    {
                        for (int j = 0; j < aas.Length; j++)
                        {
                            var newsequence = seq.Insert(i, aas[j].ToString());
                            var newref      = string.Format("INS_{0}_{1}{2} Insertion of {3}", seq, i, aas[j], seqMap[seq].Proteins.Merge("/"));
                            var newseq      = new Sequence(newref, newsequence);
                            ff.WriteSequence(sw, newseq);

                            if (options.GenerateReversedPeptide)
                            {
                                var revsequence = SequenceUtils.GetReversedSequence(newsequence);
                                var revref      = string.Format("REVERSED_{0}", reversed_index++);
                                var revseq      = new Sequence(revref, revsequence);
                                ff.WriteSequence(sw, revseq);
                            }
                        }
                    }
                }
            }

            return(new[] { options.OutputFile });
        }
Exemplo n.º 12
0
        private List <IIdentifiedSpectrum> DuplicateSpectrum(List <IIdentifiedSpectrum> spectra, string detailDir)
        {
            List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>();

            Dictionary <string, List <string> > rawmap = new Dictionary <string, List <string> >();

            foreach (var raws in rawpairs.Values)
            {
                foreach (var raw in raws)
                {
                    rawmap[raw] = raws;
                }
            }

            var format = new MascotPeptideTextFormat();

            foreach (var spectrum in spectra)
            {
                if (spectrum.HasRatio())
                {
                    var silacFile    = spectrum.GetRatioFile(detailDir);
                    var silacResult  = new SilacQuantificationSummaryItemXmlFormat().ReadFromFile(silacFile);
                    var maxIntensity = silacResult.ObservedEnvelopes.Max(m => Math.Max(m.LightIntensity, m.HeavyIntensity));
                    var scan         = silacResult.ObservedEnvelopes.Find(m => m.LightIntensity == maxIntensity || m.HeavyIntensity == maxIntensity).Scan;

                    var str    = format.PeptideFormat.GetString(spectrum);
                    var oldraw = spectrum.Query.FileScan.Experimental;
                    var lst    = rawmap[oldraw];
                    foreach (var otherraw in lst)
                    {
                        if (otherraw.Equals(oldraw))
                        {
                            continue;
                        }

                        var newspectrum = format.PeptideFormat.ParseString(str);
                        newspectrum.Query.FileScan.Experimental = otherraw;
                        newspectrum.Query.FileScan.FirstScan    = scan;
                        newspectrum.Query.FileScan.LastScan     = scan;
                        newspectrum.SetExtendedIdentification(true);

                        result.Add(newspectrum);
                        spectrum.AddDuplicatedSpectrum(newspectrum);
                    }
                }
            }

            return(result);
        }
Exemplo n.º 13
0
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Reading sequences from " + database + " ...");
            var seqs = SequenceUtils.Read(new FastaFormat(), database);

            seqs.RemoveAll(m => m.Name.StartsWith("rev_") || !m.Name.Contains("|#"));

            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("Procesing peptides from " + Path.GetFileName(fileName) + " ...");
            var peptides = format.ReadFromFile(fileName);

            Progress.SetRange(0, peptides.Count);
            foreach (var peptide in peptides)
            {
                Progress.Increment(1);
                var pureSeq = peptide.Annotations["PureSequence"] as string;
                foreach (var seq in seqs)
                {
                    if (seq.SeqString.Contains(pureSeq))
                    {
                        peptide.Annotations["MutDB"] = seq.Name;
                        break;
                    }
                }
            }

            var result = fileName + ".mutdb";

            using (StreamWriter sw = new StreamWriter(fileName + ".mutdb"))
            {
                sw.WriteLine(format.PeptideFormat.GetHeader() + "\tMutDB");
                foreach (var peptide in peptides)
                {
                    sw.Write(format.PeptideFormat.GetString(peptide));
                    if (peptide.Annotations.ContainsKey("MutDB"))
                    {
                        sw.WriteLine("\t" + peptide.Annotations["MutDB"]);
                    }
                    else
                    {
                        sw.WriteLine("\t");
                    }
                }
            }

            return(new string[] { result });
        }
Exemplo n.º 14
0
        private string[] DoStatistic(string fileName, Aminoacids aas, MascotPeptideTextFormat format, Dictionary <string, Sequence> proMap, IClassification <IIdentifiedPeptide> classification, string mutHeader, MascotPeptideTextFormat mutPepFormat, List <List <IGrouping <string, IIdentifiedPeptide> > > curtype, string curname)
        {
            var pairedMut = (from r in curtype
                             where r.Count > 1
                             select r).ToList();

            var dic = pairedMut.GroupBy(m => GetMaxScore(m[0]).Spectrum.Query.FileScan.LongFileName);

            var pairedOne2OneMut = (from d in dic
                                    where d.Count() == 1
                                    from s in d
                                    select s).ToList();

            var pairedOne2OneFile        = fileName + curname + ".paired.one2one.mut";
            var pairedOne2OnePeptideFile = OutputPairedResult(aas, format, proMap, classification, mutHeader, mutPepFormat, pairedOne2OneMut, pairedOne2OneFile);

            var pairedOne2MultipleMut         = pairedMut.Except(pairedOne2OneMut).OrderBy(m => GetMaxScore(m[0]).Spectrum.Query.FileScan.LongFileName).ToList();
            var pairedOne2MultipleFile        = fileName + curname + ".paired.one2multiple.mut";
            var pairedOne2MultiplePeptideFile = OutputPairedResult(aas, format, proMap, classification, mutHeader, mutPepFormat, pairedOne2MultipleMut, pairedOne2MultipleFile);

            var unpairedFile = fileName + curname + ".unpaired.mut";
            var unpairedMut  = (from r in curtype
                                where r.Count == 1
                                select r).ToList();

            using (StreamWriter sw = new StreamWriter(unpairedFile))
            {
                sw.WriteLine("Index\t" + mutHeader + "\tSequence\tPepCount");
                int resIndex = 0;
                foreach (var res in unpairedMut)
                {
                    resIndex++;

                    var curMutSpectrum = GetMaxScore(res[0]);
                    var mutSeq         = curMutSpectrum.PureSequence;
                    sw.WriteLine("${0}\t{1}\t{2}\t{3}", resIndex, mutPepFormat.PeptideFormat.GetString(curMutSpectrum.Spectrum), mutSeq, res[0].Count());
                }
            }

            var unpairedPeptideFile = unpairedFile + ".peptides";

            SavePeptidesFile(unpairedMut, format, unpairedPeptideFile);

            return(new string[] { pairedOne2OneFile, pairedOne2OnePeptideFile, pairedOne2MultipleFile, pairedOne2MultiplePeptideFile, unpairedFile, unpairedPeptideFile });
        }
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Reading mutation file ...");
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);

            var quanFormat = new MascotResultTextFormat();

            quanFormat.Progress = this.Progress;
            Progress.SetMessage("Reading quantification file ...");
            var ir = quanFormat.ReadFromFile(quantificationFile);

            if (ir.Count == 0)
            {
                throw new Exception("No quantification found!");
            }

            foreach (var pep in spectra)
            {
                var mutSeq     = pep.Peptide.PureSequence.Replace('I', 'L');
                var mutProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(mutSeq)));

                if (mutProtein != null)
                {
                    AddRatio(pep, mutProtein, "MUL_");
                }

                var oriSeq     = pep.Annotations["OriginalSequence"] as string;
                var oriProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(oriSeq)));

                if (oriProtein != null)
                {
                    AddRatio(pep, oriProtein, "ORI_");
                }
            }

            format.Initialize(spectra);

            var result = fileName + ".quantification";

            Progress.SetMessage("Writing peptide quantification file ...");
            format.WriteToFile(result, spectra);

            return(new string[] { result });
        }
        private IFileReader <List <IIdentifiedSpectrum> > GetPeptideReader()
        {
            IFileReader <List <IIdentifiedSpectrum> > result;

            using (var sr = new StreamReader(options.InputFile))
            {
                var header = sr.ReadLine();
                if (header.Contains("PredictionRetentionTime"))
                {
                    result = new RetentionTimePredictionFormat();
                }
                else
                {
                    result = new MascotPeptideTextFormat();
                }
            }
            return(result);
        }
Exemplo n.º 17
0
        public void WriteToFile(string proteinFile, IIdentifiedResult mr)
        {
            var proteinWriter =
                new IdentifiedProteinTextWriter(GetProteinHeader());

            using (var sw = new StreamWriter(proteinFile))
            {
                sw.WriteLine("\tName\tDescription" + proteinWriter.GetHeader());

                foreach (IIdentifiedProteinGroup mpg in mr)
                {
                    if (mpg[0].IsEnabled(true))
                    {
                        mpg[0].InitUniquePeptideCount(mph => mph.Spectrum.IsEnabled(true));

                        this.WriteFunction(sw, mpg, proteinWriter);
                    }
                }
            }

            string peptideFile = GetPeptideFileName(proteinFile);

            var peptideWriter = new MascotPeptideTextFormat(GetPeptideHeader());

            using (var sw = new StreamWriter(peptideFile))
            {
                sw.WriteLine(peptideWriter.PeptideFormat.GetHeader());

                foreach (IIdentifiedProteinGroup mpg in mr)
                {
                    if (mpg[0].IsEnabled(true))
                    {
                        foreach (IIdentifiedSpectrum mph in mpg[0].GetSpectra())
                        {
                            if (mph.IsEnabled(false))
                            {
                                sw.WriteLine(peptideWriter.PeptideFormat.GetString(mph));
                            }
                        }
                    }
                }
            }
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var pep1 = new MascotPeptideTextFormat().ReadFromFile(peptideFile1).ToDictionary(m => m.Query.FileScan.LongFileName);
            var pep2 = new MascotPeptideTextFormat().ReadFromFile(peptideFile2).ToDictionary(m => m.Query.FileScan.LongFileName);

            var commonSpectra = pep1.Keys.Intersect(pep2.Keys).ToList();

            commonSpectra.Sort();

            using (StreamWriter sw = new StreamWriter(fileName))
            {
                sw.WriteLine("FileScan\t" + Path.GetFileNameWithoutExtension(peptideFile1) + "\t" + Path.GetFileNameWithoutExtension(peptideFile2) + "\tDeltaScore");
                foreach (var spectrum in commonSpectra)
                {
                    sw.WriteLine("{0}\t{1:0.00}\t{2:0.00}\t{3:0.00}", spectrum, pep1[spectrum].Score, pep2[spectrum].Score, pep2[spectrum].Score - pep1[spectrum].Score);
                }
            }

            return(new string[] { fileName });
        }
Exemplo n.º 19
0
        public override IEnumerable <string> Process()
        {
            var evidences = new MascotPeptideTextFormat().ReadFromFile(options.InputFile);

            //Remove the PSM without mapped to proteins, usually it is from decoy database.
            evidences.RemoveAll(m => string.IsNullOrWhiteSpace(m.Annotations["Proteins"] as string));

            if (options.RemoveContanimant)
            {
                evidences.RemoveAll(m => (m.Annotations["Proteins"] as string).Contains("CON_"));
            }

            foreach (var spectrum in evidences)
            {
                ParseMaxQuantEvidencePeptide(spectrum);
            }

            new MascotPeptideTextFormat("\tFileScan\tSequence\tObs\tMH+\tDiff(MH+)\tDiffPPM\tCharge\tRank\tScore\tExpectValue\tReference\tMissCleavage\tModification\tMatchCount\tNumProteaseTermini").WriteToFile(options.OutputFile, evidences);

            return(new string[] { options.OutputFile });
        }
        protected override IFileProcessor GetFileProcessor()
        {
            format   = new MascotPeptideTextFormat();
            peptides = format.ReadFromFile(base.GetOriginFile());

            if (bFirstLoad)
            {
                var allColumns = format.PeptideFormat.GetHeader().Split('\t').ToList();
                var lvColumns  = lvPeptides.GetColumnList().ConvertAll(m => m.Text);
                if (lvColumns.Count > 0)
                {
                    this.peptideIgnoreKeys = allColumns.Except(lvColumns).ToList();
                }

                bFirstLoad = false;
            }

            FillListViewColumns(this.lvPeptides, format.PeptideFormat.GetHeader(), this.peptideIgnoreKeys, this.peptideIgnoreKeyIndecies);

            UpdatePeptides();

            var chros = (from p in peptides
                         select SpectrumToChro(p)).ToList();

            for (int i = chros.Count - 1; i >= 0; i--)
            {
                for (int j = i - 1; j >= 0; j--)
                {
                    if ((chros[i].Sequence == chros[j].Sequence) && (chros[i].Charge == chros[j].Charge) && (Math.Abs(chros[i].Mz - chros[j].Mz) < 0.0001))
                    {
                        chros.RemoveAt(j);
                        break;
                    }
                }
            }

            lvPeptides.SelectedIndexChanged -= lvPeptides_SelectedIndexChanged;

            return(new ProteinChromatographProcessor(chros, new string[] { rawFile.FullName }.ToList(), new RawFileImpl(), ppmTolerance.Value, window.Value, false));
        }
        protected override IEnumerable <string> DoProcess(string filename, List <string> result, Dictionary <IFilter <IIdentifiedSpectrum>, SpectrumEntry> map)
        {
            try
            {
                var format  = new MascotPeptideTextFormat();
                var spectra = format.ReadFromFile(filename);

                foreach (IFilter <IIdentifiedSpectrum> filter in map.Keys)
                {
                    SpectrumEntry entry = map[filter];

                    foreach (IIdentifiedSpectrum spectrum in spectra)
                    {
                        if (filter.Accept(spectrum))
                        {
                            entry.Spectra.Add(spectrum);
                        }
                    }

                    if (entry.Spectra.Count > 0)
                    {
                        entry.ResultWriter.WriteLine(format.PeptideFormat.GetHeader());
                        entry.Spectra.ForEach(m => entry.ResultWriter.WriteLine(format.PeptideFormat.GetString(m)));
                    }
                }

                return(result);
            }
            finally
            {
                foreach (SpectrumEntry entry in map.Values)
                {
                    entry.Dispose();
                }
            }
        }
        public override IEnumerable <string> Process()
        {
            var spectra   = new PercolatorOutputXmlPsmReader().ReadFromFile(_options.PercolatorOutputFile);
            var inputspec = new PercolatorInputXmlPsmReader().ReadFromFile(_options.PercolatorInputFile);
            var scanMap   = inputspec.ToDictionary(m => GetPsmId(m));

            spectra.ForEach(m =>
            {
                var psmid                  = GetPsmId(m);
                var inputScan              = scanMap[psmid];
                m.Query.QueryId            = inputScan.Query.QueryId;
                m.Query.FileScan.FirstScan = m.Query.QueryId;
                m.Query.FileScan.LastScan  = m.Query.QueryId;
                m.Query.Charge             = inputScan.Query.Charge;
                m.ExperimentalMH           = inputScan.ExperimentalMH;
                m.TheoreticalMH            = inputScan.TheoreticalMH;
                m.NumMissedCleavages       = inputScan.NumMissedCleavages;
                m.Score = inputScan.Score;
            });
            var specMap = spectra.GroupBy(m => m.Query.QueryId).ToList();
            var result  = new List <IIdentifiedSpectrum>();

            foreach (var spec in specMap)
            {
                if (spec.Count() == 1)
                {
                    result.Add(spec.First());
                }
                else
                {
                    var lst = spec.OrderByDescending(m => m.SpScore).ToList();
                    if (lst[1].SpScore < lst[0].SpScore)
                    {
                        result.Add(lst[0]);
                    }
                    else
                    {
                        if (lst[0].FromDecoy)
                        {
                            result.Add(lst[0]);
                        }
                        else if (lst[1].FromDecoy)
                        {
                            result.Add(lst[1]);
                        }
                        else
                        {
                            lst[0].AddPeptide(lst[1].Peptide);
                            result.Add(lst[0]);
                        }
                    }
                }
            }

            result.Sort((m1, m2) => m2.SpScore.CompareTo(m1.SpScore));

            var format = new MascotPeptideTextFormat("QueryId\tSpectrumId\tFileScan\tSequence\tCharge\tScore\tSvmScore\tMissCleavage\tQValue\tTheoreticalMH\tExperimentMH\tTarget/Decoy");

            var targetFile = _options.PercolatorOutputFile + ".peptides";

            format.WriteToFile(targetFile, result);

            new QValueCalculator(new PercolatorScoreFunction(), new TargetFalseDiscoveryRateCalculator()).CalculateQValue(result);
            result.RemoveAll(m => m.QValue >= 0.01);
            var target001file = FileUtils.ChangeExtension(targetFile, ".FDR0.01.peptides");

            format.WriteToFile(target001file, result);

            return(new[] { targetFile });
        }
        private void RunCurrentParameter(string parameterFile, List <string> result, BuildSummaryOptions conf)
        {
            IStringParser <string> acParser = conf.Database.GetAccessNumberParser();

            IIdentifiedProteinBuilder      proteinBuilder = new IdentifiedProteinBuilder();
            IIdentifiedProteinGroupBuilder groupBuilder   = new IdentifiedProteinGroupBuilder()
            {
                Progress = this.Progress
            };

            IdentifiedSpectrumBuilderResult isbr;

            List <IIdentifiedSpectrum> finalPeptides;

            if (string.IsNullOrEmpty(options.PeptideFile))
            { //parse from configuration
              //build spectrum list
                IIdentifiedSpectrumBuilder spectrumBuilder = conf.GetSpectrumBuilder();
                if (spectrumBuilder is IProgress)
                {
                    (spectrumBuilder as IProgress).Progress = this.Progress;
                }

                isbr          = spectrumBuilder.Build(parameterFile);
                finalPeptides = isbr.Spectra;
            }
            else
            {
                Progress.SetMessage("Reading peptides from {0} ...", options.PeptideFile);
                finalPeptides         = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile);
                conf.SavePeptidesFile = false;
                isbr = null;
            }

            CalculateIsoelectricPoint(finalPeptides);

            //如果需要通过蛋白质注释去除contamination,首先需要在肽段水平删除
            if (conf.Database.HasContaminationDescriptionFilter() && (conf.FalseDiscoveryRate.FdrLevel != FalseDiscoveryRateLevel.Protein))
            {
                Progress.SetMessage("Removing contamination by description ...");
                var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress);

                var tempResultBuilder = new IdentifiedResultBuilder(null, null);
                while (true)
                {
                    List <IIdentifiedProtein>      proteins = proteinBuilder.Build(finalPeptides);
                    List <IIdentifiedProteinGroup> groups   = groupBuilder.Build(proteins);
                    IIdentifiedResult tmpResult             = tempResultBuilder.Build(groups);

                    HashSet <IIdentifiedSpectrum> notConSpectra = new HashSet <IIdentifiedSpectrum>();
                    foreach (var group in tmpResult)
                    {
                        if (notConGroupFilter.Accept(group))
                        {
                            notConSpectra.UnionWith(group[0].GetSpectra());
                        }
                    }

                    if (notConSpectra.Count == finalPeptides.Count)
                    {
                        break;
                    }
                    finalPeptides = notConSpectra.ToList();
                }
            }

            if (conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1)
            {
                Progress.SetMessage("Filtering single wonders ...");
                var proteinFilter = new IdentifiedProteinSingleWonderPeptideCountFilter(conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount);
                List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides);
                int oldProteinCount = proteins.Count;
                proteins.RemoveAll(l => !proteinFilter.Accept(l));
                if (oldProteinCount != proteins.Count)
                {
                    HashSet <IIdentifiedSpectrum> newspectra = new HashSet <IIdentifiedSpectrum>();
                    foreach (var protein in proteins)
                    {
                        newspectra.UnionWith(protein.GetSpectra());
                    }
                    finalPeptides = newspectra.ToList();
                }
            }

            //if (conf.SavePeptidesFile && !(conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1))
            if (conf.SavePeptidesFile)
            {
                if (conf.Database.RemovePeptideFromDecoyDB)
                {
                    DecoyPeptideBuilder.AssignDecoy(finalPeptides, conf.GetDecoySpectrumFilter());
                    for (int i = finalPeptides.Count - 1; i >= 0; i--)
                    {
                        if (finalPeptides[i].FromDecoy)
                        {
                            finalPeptides.RemoveAt(i);
                        }
                    }
                }

                finalPeptides.Sort();

                //保存肽段文件
                IFileFormat <List <IIdentifiedSpectrum> > peptideFormat = conf.GetIdentifiedSpectrumFormat();
                string peptideFile = FileUtils.ChangeExtension(parameterFile, ".peptides");
                Progress.SetMessage("Writing peptides file...");
                peptideFormat.WriteToFile(peptideFile, finalPeptides);
                result.Add(peptideFile);

                if (!conf.FalseDiscoveryRate.FilterByFdr && conf.Database.DecoyPatternDefined)
                {
                    WriteFdrFile(parameterFile, conf, finalPeptides);
                }

                Progress.SetMessage("Calculating precursor offset...");
                result.AddRange(new PrecursorOffsetCalculator(finalPeptides).Process(peptideFile));
            }

            Progress.SetMessage("Building protein...");
            //构建蛋白质列表
            List <IIdentifiedProtein> finalProteins = proteinBuilder.Build(finalPeptides);

            Progress.SetMessage("Building protein group...");
            //构建蛋白质群列表
            List <IIdentifiedProteinGroup> finalGroups = groupBuilder.Build(finalProteins);

            if (conf.Database.HasContaminationDescriptionFilter())
            {
                var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress);

                for (int i = finalGroups.Count - 1; i >= 0; i--)
                {
                    if (!notConGroupFilter.Accept(finalGroups[i]))
                    {
                        finalGroups.RemoveAt(i);
                    }
                }
            }

            //构建最终鉴定结果
            var resultBuilder = conf.GetIdentifiedResultBuilder();

            resultBuilder.Progress = Progress;
            IIdentifiedResult finalResult = resultBuilder.Build(finalGroups);

            finalResult.BuildGroupIndex();

            if (conf.FalseDiscoveryRate.FilterByFdr)
            {
                var decoyGroupFilter = conf.GetDecoyGroupFilter();
                foreach (var group in finalResult)
                {
                    group.FromDecoy = decoyGroupFilter.Accept(group);
                    foreach (var protein in group)
                    {
                        protein.FromDecoy = group.FromDecoy;
                    }
                }

                finalResult.ProteinFDR = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator().Calculate(finalResult.Count(l => l[0].FromDecoy), finalResult.Count(l => !l[0].FromDecoy));
            }

            CalculateIsoelectricPoint(finalResult.GetProteins());
            if (isbr != null)
            {
                finalResult.PeptideFDR = isbr.PeptideFDR;
            }

            //保存非冗余蛋白质列表文件

            var resultFormat = conf.GetIdetifiedResultFormat(finalResult, this.Progress);

            string noredundantFile = FileUtils.ChangeExtension(parameterFile, ".noredundant");

            Progress.SetMessage("Writing noredundant file...");
            resultFormat.WriteToFile(noredundantFile, finalResult);
            result.Add(noredundantFile);

            Progress.SetMessage("Finished!");
        }
Exemplo n.º 24
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var result = new MascotPeptideTextFormat().ReadFromFile(fileName);

            FilterSpectra(result);

            UpdateModifications(result);

            foreach (var peptide in result)
            {
                peptide.Peptide.AssignProteins((peptide.Annotations[PROTEIN_KEY] as string).Split(';'));
                peptide.Annotations.Remove(PROTEIN_KEY);
                peptide.TheoreticalMass = peptide.ExperimentalMass;
            }

            var i = 0;

            while (i < result.Count - 1)
            {
                var ititle = result[i].Annotations[TITLE_KEY] as string;
                while (i < result.Count - 1)
                {
                    var jtitle = result[i + 1].Annotations[TITLE_KEY] as string;
                    if (!ititle.Equals(jtitle))
                    {
                        i++;
                        break;
                    }

                    for (int l = result[i + 1].Peptides.Count - 1; l >= 0; l--)
                    {
                        result[i].AddPeptide(result[i + 1].Peptides[l]);
                    }

                    result.RemoveAt(i + 1);
                }
            }

            foreach (var peptide in result)
            {
                var title = peptide.Annotations[TITLE_KEY] as string;
                peptide.Annotations.Remove(TITLE_KEY);

                var oldCharge = peptide.Query.FileScan.Charge;
                peptide.Query.FileScan = TitleParser.GetValue(title);

                peptide.Query.FileScan.Charge = oldCharge;
                if (string.IsNullOrEmpty(peptide.Query.FileScan.Experimental))
                {
                    peptide.Query.FileScan.Experimental = Path.GetFileNameWithoutExtension(fileName);
                }
                var rtstr = peptide.Annotations[RT_KEY] as string;
                if (!string.IsNullOrWhiteSpace(rtstr))
                {
                    peptide.Query.FileScan.RetentionTime = double.Parse(rtstr.StringBefore("-"));
                }
                peptide.Annotations.Remove(RT_KEY);
            }

            return(result);
        }
Exemplo n.º 25
0
        public override IEnumerable <string> Process()
        {
            //Extract chromotagraph information
            var chroOptions = new ChromatographProfileBuilderOptions();

            options.CopyProperties(chroOptions);
            chroOptions.InputFile  = options.InputFile;
            chroOptions.OutputFile = options.BoundaryOutputFile;
            chroOptions.DrawImage  = false;
            var builder = new ChromatographProfileBuilder(chroOptions);

            if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding envelope ...");
                builder.Progress = this.Progress;
                builder.Process();
            }

            //Calculate deuterium enrichment for peptide
            if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Calculating deuterium ...");
                var deuteriumOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = options.BoundaryOutputFile,
                    OutputFile     = options.DeuteriumOutputFile,
                    RTemplate      = DeuteriumR,
                    RExecute       = SystemUtils.GetRExecuteLocation(),
                    CreateNoWindow = true
                };

                deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0"));

                new RTemplateProcessor(deuteriumOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString());

            //Read old spectra information
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(options.InputFile);

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("RetentionTime");
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var calcSpectra = new List <IIdentifiedSpectrum>();
            var aas         = new Aminoacids();

            foreach (var pep in spectra)
            {
                var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep));
                if (deuteriumMap.ContainsKey(filename))
                {
                    var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence);
                    var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string);

                    pep.Annotations["PeakRetentionTime"]          = deuteriumMap[filename].Annotations["RetentionTime"];
                    pep.Annotations["TheoreticalDeuterium"]       = deuteriumMap[filename].Annotations["TheoreticalDeuterium"];
                    pep.Annotations["ObservedDeuterium"]          = deuteriumMap[filename].Annotations["ObservedDeuterium"];
                    pep.Annotations["NumDeuteriumIncorporated"]   = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"];
                    pep.Annotations["NumExchangableHydrogen"]     = numExchangeableHydrogens;
                    pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens;

                    calcSpectra.Add(pep);
                }
            }
            format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent";
            format.NotExportSummary      = true;
            format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra);

            var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList();

            var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var peptide in specGroup)
                {
                    var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray());
                    if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l)))
                    {
                        continue;
                    }

                    sw.Write(peptide.Key);

                    foreach (var time in times)
                    {
                        if (curSpectra.ContainsKey(time))
                        {
                            var deps      = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Peptide deuterium enrichment calculation finished ...");

            return(new string[] { options.OutputFile });
        }
Exemplo n.º 26
0
        public override IEnumerable <string> Process()
        {
            //Prepare unique peptide file
            var format   = new MascotResultTextFormat();
            var proteins = format.ReadFromFile(options.InputFile);

            proteins.RemoveAmbiguousSpectra();

            var spectra = proteins.GetSpectra();

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var peptideFile   = Path.ChangeExtension(options.InputFile, ".unique.peptides");
            var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers);

            peptideFormat.WriteToFile(peptideFile, spectra);

            //Calculate deterium enrichment at peptide level
            var pepOptions = new DeuteriumCalculatorOptions();

            options.CopyProperties(pepOptions);
            pepOptions.InputFile  = peptideFile;
            pepOptions.OutputFile = peptideFile + ".tsv";

            var pepCalc = new PeptideDeuteriumCalculator(pepOptions);

            pepCalc.Progress = this.Progress;
            pepCalc.Process();

            //Copy annotation from calculated peptide to original peptide
            var calcSpectra   = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile());
            var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName);

            foreach (var calcSpec in calcSpectra)
            {
                var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName];
                foreach (var ann in calcSpec.Annotations)
                {
                    oldSpec.Annotations[ann.Key] = ann.Value;
                }
            }

            //Remove the peptide not contain calculation result
            for (int i = proteins.Count - 1; i >= 0; i--)
            {
                foreach (var protein in proteins[i])
                {
                    protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent"));
                }

                if (proteins[i][0].Peptides.Count == 0)
                {
                    proteins.RemoveAt(i);
                }
            }

            format.PeptideFormat = peptideFormat.PeptideFormat;

            var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv");

            format.WriteToFile(noredundantFile, proteins);

            var times    = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();
            var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv");

            using (var sw = new StreamWriter(timeFile))
            {
                sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var protein in proteins)
                {
                    var curSpectra = protein[0].GetSpectra();
                    if (options.PeptideInAllTimePointOnly)
                    {
                        var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence);
                        curSpectra.Clear();
                        foreach (var peps in curMap.Values)
                        {
                            var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);
                            if (times.All(time => pepMap.ContainsKey(time)))
                            {
                                curSpectra.AddRange(peps);
                            }
                        }
                    }

                    if (curSpectra.Count == 0)
                    {
                        continue;
                    }

                    sw.Write((from p in protein select p.Name).Merge("/"));
                    var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);

                    foreach (var time in times)
                    {
                        if (curTimeMap.ContainsKey(time))
                        {
                            var deps      = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Calculating ratio consistant ...");
            var deuteriumOptions = new RTemplateProcessorOptions()
            {
                InputFile      = timeFile,
                OutputFile     = options.OutputFile,
                RTemplate      = RatioR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(deuteriumOptions)
            {
                Progress = this.Progress
            }.Process();

            Progress.SetMessage("Finished ...");

            return(new string[] { options.OutputFile });
        }
    public void TestCalculateQValue()
    {
      var peptides = new MascotPeptideTextFormat().ReadFromFile("../../../data/QTOF_Ecoli.LowRes.t.xml.peptides");
      peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6);
      peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_")));

      IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator());

      Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001);
    }
    public void TestSameEngineDifferentParameters()
    {
      ClassificationOptions co = new ClassificationOptions();
      co.ClassifyByCharge = true;
      co.ClassifyByMissCleavage = true;
      co.ClassifyByModification = true;
      co.ModifiedAminoacids = "STY";
      co.ClassifyByNumProteaseTermini = true;

      var s1 = new MascotPeptideTextFormat().ReadFromFile(@"../../../data/deisotopic.peptides");
      IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1);

      s1.ForEach(m => m.Tag = "deisotopic");
      var s2 = new MascotPeptideTextFormat().ReadFromFile(@"../../../data/deisotopic-top10.peptides");
      IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2);
      s2.ForEach(m => m.Tag = "deisotopic-top");

      var all = s1.Union(s2).ToList();

      var p1 = new List<IIdentifiedSpectrum>(all);
      IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction());

      p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
      var bin1 = co.BuildSpectrumBin(p1);

      var p2 = new List<IIdentifiedSpectrum>(all);
      IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction());

      p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
      var bin2 = co.BuildSpectrumBin(p2);
      bin2.ForEach(m =>
      {
        IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction());

        var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString()));
        Assert.AreEqual(m.Spectra.Count, n.Spectra.Count);
        //{
        //  if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2"))
        //  {
        //    Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992")));
        //  }

        //  var diff1 = m.Spectra.Except(n.Spectra).ToList();
        //  Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString());
        //  diff1.ForEach(k =>
        //  {
        //    var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName));
        //    lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence));
        //  });
        //}
      });
    }
        public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc)
        {
            IdentificationSummary result = new IdentificationSummary();

            result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, "");

            Regex decoyReg = new Regex(defaultDecoyPattern);

            IIdentifiedProteinGroupFilter decoyFilter = null;
            IFalseDiscoveryRateCalculator curCalc     = null;

            var paramFile = FileUtils.ChangeExtension(proteinFile, ".param");

            if (File.Exists(paramFile))
            {
                BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile);
                if (options.FalseDiscoveryRate.FilterByFdr)
                {
                    decoyFilter = options.GetDecoyGroupFilter();
                    curCalc     = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
                }
            }

            if (decoyFilter == null)
            {
                decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false);
                curCalc     = defaultCalc;
            }

            var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides");

            if (File.Exists(peptideFile))
            {
                var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile);

                var fullSpectra       = GetSpectraByNPT(peptides, 2);
                var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra);
                var semiSpectra       = GetSpectraByNPT(peptides, 1);
                var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra);

                result.FullSpectrumCount       = GetSpectrumCount(fullSpectra);
                result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra);
                result.SemiSpectrumCount       = GetSpectrumCount(semiSpectra);
                result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra);

                result.FullPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra);
                result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra);
                result.SemiPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra);
                result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra);

                result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount);
                result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount);
                result.FullPeptideFdr  = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount);
                result.SemiPeptideFdr  = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount);
            }

            if (File.Exists(proteinFile))
            {
                var ir = new MascotResultTextFormat().ReadFromFile(proteinFile);
                ir.InitUniquePeptideCount();

                var u2proteins = (from p in ir
                                  where p[0].UniquePeptideCount > 1
                                  select p).ToList();

                var u1proteins = (from p in ir
                                  where p[0].UniquePeptideCount == 1
                                  select p).ToList();

                result.ProteinGroupCount        = ir.Count;
                result.Unique2ProteinGroupCount = u2proteins.Count;

                int targetCount;
                result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique2ProteinGroupTargetCount = (int)targetCount;

                result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique1ProteinGroupTargetCount = (int)targetCount;
            }

            return(result);
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var peps = new MascotPeptideTextFormat().ReadFromFile(fileName);

            peps.RemoveAll(m => !(m.Annotations["Number of Phospho (STY)"] as string).Equals("1"));

            var silac     = new MascotResultTextFormat().ReadFromFile(silacFile);
            var silacPeps = silac.GetSpectra();

            silacPeps.RemoveAll(m => m.GetQuantificationItem() == null || !m.GetQuantificationItem().HasRatio);

            Regex reg      = new Regex(@"Cx_(.+)");
            var   silacMap = silacPeps.ToGroupDictionary(m => m.Peptide.PureSequence + GetModificationCount(m.Peptide, "STY"));

            int found  = 0;
            int missed = 0;

            var matchFile = fileName + ".match";

            using (StreamWriter sw = new StreamWriter(matchFile))
            {
                sw.Write("Sequence");
                var mq = peps[0].GetMaxQuantItemList();
                foreach (var mqi in mq)
                {
                    sw.Write("\tm_" + mqi.Name);
                    sw.Write("\ts_" + mqi.Name);
                }
                sw.WriteLine();

                foreach (var p in peps)
                {
                    var pureSeqKey = p.Peptide.PureSequence + p.Annotations["Number of Phospho (STY)"].ToString();

                    if (silacMap.ContainsKey(pureSeqKey))
                    {
                        found++;
                        Console.WriteLine("Find - " + pureSeqKey);

                        var findPep    = silacMap[pureSeqKey];
                        var findPepMap = findPep.ToGroupDictionary(m => reg.Match(m.Query.FileScan.Experimental).Groups[1].Value);

                        mq = p.GetMaxQuantItemList();
                        sw.Write(p.Peptide.PureSequence);
                        foreach (var mqi in mq)
                        {
                            if (string.IsNullOrEmpty(mqi.Ratio))
                            {
                                sw.Write("\t");
                            }
                            else
                            {
                                sw.Write("\t{0:0.00}", Math.Log(MyConvert.ToDouble(mqi.Ratio)));
                            }

                            if (!findPepMap.ContainsKey(mqi.Name))
                            {
                                sw.Write("\t");
                            }
                            else
                            {
                                var spectra = findPepMap[mqi.Name];
                                spectra.Sort((m1, m2) => m2.GetQuantificationItem().Correlation.CompareTo(m1.GetQuantificationItem().Correlation));
                                sw.Write("\t{0:0.00}", -Math.Log(spectra[0].GetQuantificationItem().Ratio));
                            }
                        }
                        sw.WriteLine();
                    }
                    else
                    {
                        missed++;
                        Console.WriteLine("Missed - " + pureSeqKey);
                    }
                }
            }

            Console.WriteLine("Found = {0}; Missed = {1}", found, missed);
//      Regex reg =new Regex(@"Cx_(.+)");

            return(new string[] { });
        }
    public void TestFillProteinInformation()
    {
      var peptides = new MascotPeptideTextFormat().ReadFromFile("../../../data/Test.output.xml.FDR0.01.peptides");
      Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0));

      IdentifiedSpectrumUtils.FillProteinInformation(peptides, "../../../data/Test.output.xml.FDR0.01.peptides.proteins");
      Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0));
    }
        public override IEnumerable <string> Process()
        {
            var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m));

            Progress.SetMessage("Reading library file ...");
            var liblist = new MS2ItemXmlFormat().ReadFromFile(options.LibraryFile);

            PreprocessingMS2ItemList(liblist);

            var lib = liblist.GroupBy(m => m.Charge).ToDictionary(m => m.Key, m => m.ToList());

            Progress.SetMessage("Building library sequence amino acid composition ...");
            lib.ForEach(m => m.Value.ForEach(l => l.AminoacidCompsition = (from a in l.Peptide
                                                                           where options.SubstitutionDeltaMassMap.ContainsKey(a)
                                                                           select a).Distinct().OrderBy(k => k).ToArray()));

            var expScanMap = (from p in liblist
                              from sq in p.FileScans
                              select sq).ToList().GroupBy(m => m.Experimental).ToDictionary(m => m.Key, m => new HashSet <int>(from l in m select l.FirstScan));

            if (File.Exists(options.PeptidesFile))
            {
                Progress.SetMessage("Reading peptides file used for excluding scan ...");
                var peptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptidesFile);
                foreach (var pep in peptides)
                {
                    HashSet <int> scans;
                    if (!expScanMap.TryGetValue(pep.Query.FileScan.Experimental, out scans))
                    {
                        scans = new HashSet <int>();
                        expScanMap[pep.Query.FileScan.Experimental] = scans;
                    }
                    scans.Add(pep.Query.FileScan.FirstScan);
                }
            }

            Progress.SetMessage("Reading MS2/MS3 data ...");
            var result = GetCandidateMs2ItemList(expRawfileMap, expScanMap);

            PreprocessingMS2ItemList(result);

            //new MS2ItemXmlFormat().WriteToFile(options.OutputFile + ".xml", result);

            Progress.SetMessage("Finding SAP ...");
            List <SapPredicted> predicted = new List <SapPredicted>();

            var minDeltaMass = options.SubstitutionDeltaMassMap.Values.Min(l => l.Min(k => k.DeltaMass));
            var maxDeltaMass = options.SubstitutionDeltaMassMap.Values.Max(l => l.Max(k => k.DeltaMass));

            Progress.SetRange(0, result.Count);
            Progress.Begin();

            FindCandidates(lib, result, predicted, minDeltaMass, maxDeltaMass);

            var groups = predicted.ToGroupDictionary(m => m.Ms2.GetFileScans());

            predicted.Clear();
            foreach (var g in groups.Values)
            {
                var gg = g.ToGroupDictionary(m => m.LibMs2).Values.ToList();
                gg.Sort((m1, m2) =>
                {
                    return(CompareSapPrecitedList(m1, m2));
                });

                var expect = gg[0].FirstOrDefault(m => m.IsExpect);
                if (expect != null)
                {
                    predicted.Add(expect);
                }
                else
                {
                    predicted.AddRange(gg[0]);
                    for (int i = 1; i < gg.Count; i++)
                    {
                        if (CompareSapPrecitedList(gg[0], gg[i]) == 0)
                        {
                            predicted.AddRange(gg[i]);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            if (File.Exists(options.MatchedFile))
            {
                new SapPredictedValidationWriter(options.MatchedFile).WriteToFile(options.OutputFile, predicted);
            }
            else
            {
                new SapPredictedWriter().WriteToFile(options.OutputTableFile, predicted);

                Progress.SetMessage("Generating SAP sequence ...");
                List <Sequence> predictedSeq = new List <Sequence>();
                foreach (var predict in predicted)
                {
                    var seq = PeptideUtils.GetPureSequence(predict.LibMs2.Peptide);
                    if (predict.Target.TargetType == VariantType.SingleAminoacidPolymorphism)
                    {
                        for (int i = 0; i < seq.Length; i++)
                        {
                            if (seq[i] == predict.Target.Source[0])
                            {
                                foreach (var t in predict.Target.Target)
                                {
                                    string targetSeq;
                                    if (i == 0)
                                    {
                                        targetSeq = t + seq.Substring(1);
                                    }
                                    else
                                    {
                                        targetSeq = seq.Substring(0, i) + t + seq.Substring(i + 1);
                                    }

                                    var reference = string.Format("sp|SAP_{0}_{1}|{2}_{3}_{4}_{5}", targetSeq, predict.Target.TargetType, seq, predict.Target.Source, i + 1, t);
                                    predictedSeq.Add(new Sequence(reference, targetSeq));
                                }
                            }
                        }
                    }
                    else
                    {
                        foreach (var tseq in predict.Target.Target)
                        {
                            string reference;
                            if (predict.Target.TargetType == VariantType.NTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(0, seq.Length - tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalLoss)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_loss_{3}", tseq, predict.Target.TargetType, seq, seq.Substring(tseq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.NTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(0, tseq.Length - seq.Length));
                            }
                            else if (predict.Target.TargetType == VariantType.CTerminalExtension)
                            {
                                reference = string.Format("sp|SAP_{0}_{1}|{2}_ext_{3}", tseq, predict.Target.TargetType, seq, tseq.Substring(seq.Length));
                            }
                            else
                            {
                                throw new Exception("I don't know how to deal with " + predict.Target.TargetType.ToString());
                            }

                            predictedSeq.Add(new Sequence(reference, tseq));
                        }
                    }
                }

                predictedSeq = (from g in predictedSeq.GroupBy(m => m.SeqString)
                                select g.First()).ToList();

                Progress.SetMessage("Reading database {0} ...", options.DatabaseFastaFile);
                var databases = SequenceUtils.Read(options.DatabaseFastaFile);

                Progress.SetMessage("Removing variant sequences which are already existed in database ...");
                for (int i = predictedSeq.Count - 1; i >= 0; i--)
                {
                    foreach (var db in databases)
                    {
                        if (db.SeqString.Contains(predictedSeq[i].SeqString))
                        {
                            predictedSeq.RemoveAt(i);
                            break;
                        }
                    }
                }
                databases.AddRange(predictedSeq);

                Progress.SetMessage("Writing SAP sequence and original database to {0} ...", options.OutputFile);

                SequenceUtils.Write(new FastaFormat(), options.OutputFile, databases);
            }

            Progress.End();

            return(new string[] { options.OutputFile, options.OutputTableFile });
        }
Exemplo n.º 33
0
 public MaxQuantPeptidesMerger(Dictionary <string, List <string> > sourceFiles)
 {
     this.format      = new MascotPeptideTextFormat();
     this.sourceFiles = sourceFiles;
 }