Exemplo n.º 1
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format = new MascotPeptideTextFormat();

            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.QValue >= fdr);
            peptides.ForEach(m => m.TheoreticalMinusExperimentalMass = Math.Round(m.TheoreticalMinusExperimentalMass));
            peptides.RemoveAll(m => m.TheoreticalMinusExperimentalMass == 0.0);

            var result1 = MyConvert.Format("{0}.fdr{1:0.000}.txt", fileName, fdr);

            format.WriteToFile(result1, peptides);

            var groups = peptides.GroupBy(m => m.TheoreticalMinusExperimentalMass).ToList();

            groups.Sort((m1, m2) => - m1.Count().CompareTo(m2.Count()));

            var result2 = MyConvert.Format("{0}.fdr{1:0.000}.groups", fileName, fdr);

            using (StreamWriter sw = new StreamWriter(result2))
            {
                foreach (var group in groups)
                {
                    sw.WriteLine("{0:0}\t{1}", -group.Key, group.Count());
                }
            }

            return(new string[] { result1, result2 });
        }
        /*
         * public IPeptideMassCalculator GetPeptideMassCalculator(MascotModificationItem dynamicModification)
         * {
         * bool isMono = true;
         *
         * var aas = new Aminoacids();
         * staticModification.ForEach(m => aas[aas].ResetMass(aas[m].MonoMass + staticModifications[aa], aas[aa].AverageMass + staticModifications[aa]);
         * }
         *
         * var diff = new[] { '*', '#', '@', '^', '~', '$' };
         * int i = 0;
         * foreach (double mod in Diff_search_options.Values)
         * {
         *  aas[diff[i++]].ResetMass(mod, mod);
         * }
         *
         * double nterm = isMono ? Atom.H.MonoMass : Atom.H.AverageMass;
         * double cterm = isMono ? Atom.H.MonoMass + Atom.O.MonoMass : Atom.H.AverageMass + Atom.O.AverageMass;
         *
         * if (this.term_diff_search_options.First != 0.0 || this.term_diff_search_options.Second != 0.0)
         * {
         *  throw new Exception(
         *    "Term dynamic modification has not been implemented into this function, call author to fix it.");
         * }
         *
         * IPeptideMassCalculator result;
         * if (isMono)
         * {
         *  result = new MonoisotopicPeptideMassCalculator(aas, nterm, cterm);
         * }
         * else
         * {
         *  result = new AveragePeptideMassCalculator(aas, nterm, cterm);
         * }
         *
         * return result;
         * }
         */
        public override IEnumerable <string> Process(string fileName)
        {
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);

            return(null);
        }
Exemplo n.º 3
0
        protected override IIdentifiedResult GetIdentifiedResult(string fileName)
        {
            format = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);
            IIdentifiedResult result;

            if (isSiteLevel)
            {
                result = IdentifiedSpectrumUtils.BuildGroupByPeptide(spectra);
            }
            else
            {
                result = IdentifiedSpectrumUtils.BuildGroupByUniquePeptide(spectra);
            }

            var map = SequenceUtils.ReadAccessNumberReferenceMap(new FastaFormat(), this.fastaFile, this.parser);

            foreach (var group in result)
            {
                var proteins = group[0].Description.Split('/');
                group[0].Description = (from p in proteins
                                        let ac = parser.GetValue(p)
                                                 select map[ac]).ToList().Merge(" ! ");
            }
            return(result);
        }
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            var resultpeptides = peptides.FindAll(m =>
            {
                bool bNormal = false;
                bool bSnp    = false;
                foreach (var p in m.Proteins)
                {
                    if (!regex.Match(p).Success)
                    {
                        bNormal = true;
                    }
                    else
                    {
                        bSnp = true;
                    }
                }
                return(!bNormal && bSnp);
            });

            var result = FileUtils.ChangeExtension(fileName, ".snp.peptides");

            format.WriteToFile(result, resultpeptides);

            return(new string[] { result });
        }
Exemplo n.º 5
0
        public override IEnumerable <string> Process(string fileName)
        {
            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(fileName);

            peptides.RemoveAll(m => m.Proteins.Any(n => decoyReg.Match(n).Success));

            var result = FileUtils.ChangeExtension(fileName, ".target.peptides");

            format.WriteToFile(result, peptides);

            return(new string[] { result });
        }
        public override IEnumerable <string> Process()
        {
            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + options.PeptideFile + " ...");
            var spectra = format.ReadFromFile(options.PeptideFile);
            var seqMap  = new Dictionary <string, IIdentifiedPeptide>();

            foreach (var spec in spectra)
            {
                seqMap[spec.Peptide.PureSequence] = spec.Peptide;
            }

            var aas = (from c in new Aminoacids().GetVisibleAminoacids()
                       where c != 'I'
                       select c.ToString()).Merge("");

            var ff = new FastaFormat();

            Progress.SetMessage("inserting amino acid ...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine(File.ReadAllText(options.DatabaseFile));

                var seqs           = seqMap.Keys.OrderBy(m => m).ToArray();
                var reversed_index = 1000000;
                foreach (var seq in seqs)
                {
                    for (int i = 0; i < seq.Length; i++)
                    {
                        for (int j = 0; j < aas.Length; j++)
                        {
                            var newsequence = seq.Insert(i, aas[j].ToString());
                            var newref      = string.Format("INS_{0}_{1}{2} Insertion of {3}", seq, i, aas[j], seqMap[seq].Proteins.Merge("/"));
                            var newseq      = new Sequence(newref, newsequence);
                            ff.WriteSequence(sw, newseq);

                            if (options.GenerateReversedPeptide)
                            {
                                var revsequence = SequenceUtils.GetReversedSequence(newsequence);
                                var revref      = string.Format("REVERSED_{0}", reversed_index++);
                                var revseq      = new Sequence(revref, revsequence);
                                ff.WriteSequence(sw, revseq);
                            }
                        }
                    }
                }
            }

            return(new[] { options.OutputFile });
        }
Exemplo n.º 7
0
        protected List <IIdentifiedSpectrum> GetSpectra(string fileName)
        {
            Progress.SetMessage("Reading peptides ...");
            List <IIdentifiedSpectrum> spectra = format.ReadFromFile(fileName);

            Progress.SetMessage("Reading itraq ...");
            IsobaricResult itraq = ITraqResultFileFormatFactory.GetXmlFormat().ReadFromFile(rawFileName);

            Progress.SetMessage("Matching peptide and itraq ...");

            ITraqItemUtils.MatchPeptideWithItraq(itraq, spectra);

            return(spectra);
        }
Exemplo n.º 8
0
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Reading sequences from " + database + " ...");
            var seqs = SequenceUtils.Read(new FastaFormat(), database);

            seqs.RemoveAll(m => m.Name.StartsWith("rev_") || !m.Name.Contains("|#"));

            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("Procesing peptides from " + Path.GetFileName(fileName) + " ...");
            var peptides = format.ReadFromFile(fileName);

            Progress.SetRange(0, peptides.Count);
            foreach (var peptide in peptides)
            {
                Progress.Increment(1);
                var pureSeq = peptide.Annotations["PureSequence"] as string;
                foreach (var seq in seqs)
                {
                    if (seq.SeqString.Contains(pureSeq))
                    {
                        peptide.Annotations["MutDB"] = seq.Name;
                        break;
                    }
                }
            }

            var result = fileName + ".mutdb";

            using (StreamWriter sw = new StreamWriter(fileName + ".mutdb"))
            {
                sw.WriteLine(format.PeptideFormat.GetHeader() + "\tMutDB");
                foreach (var peptide in peptides)
                {
                    sw.Write(format.PeptideFormat.GetString(peptide));
                    if (peptide.Annotations.ContainsKey("MutDB"))
                    {
                        sw.WriteLine("\t" + peptide.Annotations["MutDB"]);
                    }
                    else
                    {
                        sw.WriteLine("\t");
                    }
                }
            }

            return(new string[] { result });
        }
Exemplo n.º 9
0
        public override IEnumerable <string> Process(string targetFilename)
        {
            foreach (var key in sourceFiles.Keys)
            {
                foreach (var file in sourceFiles[key])
                {
                    Progress.SetMessage("Processing " + file + " ...");
                    var spectra = format.ReadFromFile(file);
                    spectra.RemoveAll(m => m.GetMaxQuantItemList()[0].Ratio.Length == 0);
                    spectra.ForEach(m => m.Tag = key);
                }
            }

            //using (StreamWriter sw = new StreamWriter(targetFilename))
            //{
            //  HashSet<string> unique = new HashSet<string> ();
            //  int totalSpectrumCount = 0;

            //  Progress.SetRange(1, sourceFiles.Length);
            //  int count = 0;
            //  LineFormat<IIdentifiedSpectrum> pepFormat = null;
            //  foreach (string sourceFile in sourceFiles)
            //  {
            //    Progress.SetMessage("Processing " + sourceFile + " ...");

            //    var spectra = format.ReadFromFile(sourceFile);

            //    totalSpectrumCount += spectra.Count;
            //    unique.UnionWith(IdentifiedSpectrumUtils.GetUniquePeptide(spectra));

            //    if(count == 0){
            //      pepFormat = format.PeptideFormat;
            //      sw.WriteLine(pepFormat.GetHeader());
            //    }

            //    spectra.ForEach(m => sw.WriteLine(pepFormat.GetString(m)));

            //    count++;
            //    Progress.SetPosition(count);
            //  }

            //  format.WriteSummary(sw, totalSpectrumCount, unique.Count);

            //  Progress.End();
            //}

            return(new[] { targetFilename });
        }
Exemplo n.º 10
0
        protected override void ParseToCalculationItems()
        {
            List <IIdentifiedSpectrum> spectra = format.ReadFromFile(option.SourceFileName);

            IEnumerable <IIdentifiedSpectrum> filteredSpectra = FilterSpectrum(spectra);

            calculationItems =
                (from entry in
                 (from spectrum in filteredSpectra
                  let pep = spectrum.Peptide
                            group pep by pep.Sequence)
                 select new CalculationItem()
            {
                Key = entry.Key,
                Peptides = entry
            }).ToList();
        }
        public override IEnumerable <string> Process(string fileName)
        {
            Progress.SetMessage("Reading mutation file ...");
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(fileName);

            var quanFormat = new MascotResultTextFormat();

            quanFormat.Progress = this.Progress;
            Progress.SetMessage("Reading quantification file ...");
            var ir = quanFormat.ReadFromFile(quantificationFile);

            if (ir.Count == 0)
            {
                throw new Exception("No quantification found!");
            }

            foreach (var pep in spectra)
            {
                var mutSeq     = pep.Peptide.PureSequence.Replace('I', 'L');
                var mutProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(mutSeq)));

                if (mutProtein != null)
                {
                    AddRatio(pep, mutProtein, "MUL_");
                }

                var oriSeq     = pep.Annotations["OriginalSequence"] as string;
                var oriProtein = ir.FirstOrDefault(m => m.Any(n => n.Name.Equals(oriSeq)));

                if (oriProtein != null)
                {
                    AddRatio(pep, oriProtein, "ORI_");
                }
            }

            format.Initialize(spectra);

            var result = fileName + ".quantification";

            Progress.SetMessage("Writing peptide quantification file ...");
            format.WriteToFile(result, spectra);

            return(new string[] { result });
        }
        protected override IFileProcessor GetFileProcessor()
        {
            format   = new MascotPeptideTextFormat();
            peptides = format.ReadFromFile(base.GetOriginFile());

            if (bFirstLoad)
            {
                var allColumns = format.PeptideFormat.GetHeader().Split('\t').ToList();
                var lvColumns  = lvPeptides.GetColumnList().ConvertAll(m => m.Text);
                if (lvColumns.Count > 0)
                {
                    this.peptideIgnoreKeys = allColumns.Except(lvColumns).ToList();
                }

                bFirstLoad = false;
            }

            FillListViewColumns(this.lvPeptides, format.PeptideFormat.GetHeader(), this.peptideIgnoreKeys, this.peptideIgnoreKeyIndecies);

            UpdatePeptides();

            var chros = (from p in peptides
                         select SpectrumToChro(p)).ToList();

            for (int i = chros.Count - 1; i >= 0; i--)
            {
                for (int j = i - 1; j >= 0; j--)
                {
                    if ((chros[i].Sequence == chros[j].Sequence) && (chros[i].Charge == chros[j].Charge) && (Math.Abs(chros[i].Mz - chros[j].Mz) < 0.0001))
                    {
                        chros.RemoveAt(j);
                        break;
                    }
                }
            }

            lvPeptides.SelectedIndexChanged -= lvPeptides_SelectedIndexChanged;

            return(new ProteinChromatographProcessor(chros, new string[] { rawFile.FullName }.ToList(), new RawFileImpl(), ppmTolerance.Value, window.Value, false));
        }
        protected override IEnumerable <string> DoProcess(string filename, List <string> result, Dictionary <IFilter <IIdentifiedSpectrum>, SpectrumEntry> map)
        {
            try
            {
                var format  = new MascotPeptideTextFormat();
                var spectra = format.ReadFromFile(filename);

                foreach (IFilter <IIdentifiedSpectrum> filter in map.Keys)
                {
                    SpectrumEntry entry = map[filter];

                    foreach (IIdentifiedSpectrum spectrum in spectra)
                    {
                        if (filter.Accept(spectrum))
                        {
                            entry.Spectra.Add(spectrum);
                        }
                    }

                    if (entry.Spectra.Count > 0)
                    {
                        entry.ResultWriter.WriteLine(format.PeptideFormat.GetHeader());
                        entry.Spectra.ForEach(m => entry.ResultWriter.WriteLine(format.PeptideFormat.GetString(m)));
                    }
                }

                return(result);
            }
            finally
            {
                foreach (SpectrumEntry entry in map.Values)
                {
                    entry.Dispose();
                }
            }
        }
Exemplo n.º 14
0
        public override IEnumerable <string> Process(string fileName)
        {
            var aas = new Aminoacids();

            Progress.SetMessage("reading pNovo result from " + pNovoPeptideFile + " ...");
            var pNovoSpectra = new MascotPeptideTextFormat().ReadFromFile(pNovoPeptideFile);
            var pNovoMap     = new Dictionary <string, HashSet <string> >();

            foreach (var pep in pNovoSpectra)
            {
                var key = pep.Query.FileScan.LongFileName;
                if (!pNovoMap.ContainsKey(key))
                {
                    pNovoMap[key] = new HashSet <string>();
                }
                pNovoMap[key].UnionWith(from p in pep.Peptides select p.PureSequence);
            }

            var format = new MascotPeptideTextFormat();

            Progress.SetMessage("reading peptide-spectra-matches from " + fileName + " ...");
            var spectra = format.ReadFromFile(fileName);

            //价位筛选
            spectra.RemoveAll(m => !charges.Contains(m.Charge));
            //对于有不确定的氨基酸,直接忽略。
            spectra.ForEach(m =>
            {
                for (int i = m.Peptides.Count - 1; i >= 0; i--)
                {
                    if (m.Peptides[i].PureSequence.Any(n => aas[n].Codes.Length == 0))
                    {
                        m.RemovePeptideAt(i);
                    }
                }
            });
            spectra.RemoveAll(m => m.Peptides.Count == 0);

            Progress.SetMessage("comparing peptide-spectra-matches with pNovo result...");
            //与pNovo判定的mutation是否一致?
            spectra.RemoveAll(m =>
            {
                if (!IsMutationPeptide(m))
                {
                    return(false);
                }

                var key = m.Query.FileScan.LongFileName;
                if (!pNovoMap.ContainsKey(key))
                {
                    return(true);
                }

                var set = pNovoMap[key];
                return(!m.Peptides.Any(n => set.Contains(n.PureSequence.Replace('I', 'L'))));
            });

            //Get spectra whose peptides are all from mutated version
            var mutSpectra  = spectra.FindAll(m => IsMutationPeptide(m)).ToList();
            var mutPeptides = (from s in mutSpectra
                               from p in s.Peptides
                               select p).ToList();
            var mutGroup = mutPeptides.GroupBy(m => m.PureSequence);

            //Get specra whose peptides are all from wide version
            var fromSpectra = spectra.Except(mutSpectra).ToList();

            fromSpectra.RemoveAll(m => m.Proteins.Any(n => mutationReg.Match(n).Success));
            var fromPeptides = (from s in fromSpectra
                                from p in s.Peptides
                                select p).ToList();
            var fromGroup = fromPeptides.GroupBy(m => m.PureSequence).ToGroupDictionary(n => n.Key.Length);
            var minLength = fromGroup.Count == 0 ? 6 : fromGroup.Min(m => m.Key);
            var maxLength = fromGroup.Count == 0 ? 30 : fromGroup.Max(m => m.Key);

            //Check the mutation type
            var type1 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();
            var type2 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();
            var type3 = new List <List <IGrouping <string, IIdentifiedPeptide> > >();

            Progress.SetRange(0, mutGroup.Count());
            Progress.SetPosition(0);
            Progress.SetMessage("finding mutation-original pairs ...");

            foreach (var mut in mutGroup)
            {
                var matched = new List <IGrouping <string, IIdentifiedPeptide> >();
                matched.Add(mut);
                Progress.Increment(1);

                var protein = mut.First().Proteins[0];

                List <List <IGrouping <string, IIdentifiedPeptide> > > type;
                if (protein.EndsWith("type3"))
                {
                    type = type3;
                    var mutseq = mut.Key.Substring(0, mut.Key.Length - 1);
                    for (int i = mut.Key.Length + 1; i <= maxLength; i++)
                    {
                        if (fromGroup.ContainsKey(i))
                        {
                            var others = fromGroup[i];
                            foreach (var o in others)
                            {
                                if (o.Key.StartsWith(mutseq))
                                {
                                    matched.Add(o);
                                }
                            }
                        }
                    }
                }
                else if (protein.EndsWith("type2"))
                {
                    type = type2;
                    for (int i = minLength; i < mut.Key.Length; i++)
                    {
                        if (fromGroup.ContainsKey(i))
                        {
                            var others = fromGroup[i];
                            foreach (var o in others)
                            {
                                var oseq = o.Key.Substring(0, o.Key.Length - 1);
                                if (mut.Key.StartsWith(oseq))
                                {
                                    matched.Add(o);
                                }
                            }
                        }
                    }
                }
                else if (protein.EndsWith("type1"))
                {
                    type = type1;

                    if (fromGroup.ContainsKey(mut.Key.Length))
                    {
                        var oLength = fromGroup[mut.Key.Length];
                        foreach (var o in oLength)
                        {
                            int mutationSite = -1;
                            if (MutationUtils.IsMutationOneIL2(o.Key, mut.Key, ref mutationSite, IgnoreNtermMutation, IgnoreDeamidatedMutation, IgnoreMultipleNucleotideMutation))
                            {
                                matched.Add(o);
                            }
                        }
                    }
                }
                else
                {
                    throw new Exception("There is no mutation type information at protein name: " + protein + "\nIt should be like MUL_NHLGQK_type1, MUL_NHLGQK_type2 or MUL_NHLGQK_type3");
                }

                type.Add(matched);
            }

            type1.Sort((m1, m2) =>
            {
                var res = m1.Count.CompareTo(m2.Count);
                if (res == 0)
                {
                    res = m2[0].Count().CompareTo(m1[0].Count());
                }
                return(res);
            });

            Progress.SetMessage("reading protein sequences ...");
            var proteins = SequenceUtils.Read(new FastaFormat(), fastaFile);

            var proMap = proteins.ToDictionary(m =>
            {
                string ac;
                if (acParser.TryParse(m.Name, out ac))
                {
                    return(ac);
                }
                else
                {
                    return(m.Name);
                }
            });

            var    classification = GetClassification();
            string mutHeader      = "FileScan\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tExpectValue\tModification";
            var    mutPepFormat   = new MascotPeptideTextFormat(mutHeader);

            Progress.SetMessage("writing result ...");
            var result1 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type1, ".type1");
            var result2 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type2, ".type2");
            var result3 = DoStatistic(fileName, aas, format, proMap, classification, mutHeader, mutPepFormat, type3, ".type3");

            return(result1.Concat(result2).Concat(result3).ToArray());
        }
Exemplo n.º 15
0
        public override IEnumerable <string> Process()
        {
            var format         = new MascotPeptideTextFormat();
            var expPeptidesMap = format.ReadFromFile(options.PeptideFile).GroupBy(m => m.Query.FileScan.Experimental).ToDictionary(m => m.Key, m => m.ToList());
            var expRawfileMap  = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m));

            foreach (var exp in expPeptidesMap.Keys)
            {
                if (!expRawfileMap.ContainsKey(exp))
                {
                    throw new Exception(string.Format("Raw file of {0} is not assigned in RawFiles.", exp));
                }
            }

            var ms2list = new List <MS2Item>();

            foreach (var exp in expPeptidesMap.Keys)
            {
                var rawfile  = expRawfileMap[exp];
                var peptides = expPeptidesMap[exp];

                using (var reader = RawFileFactory.GetRawFileReader(rawfile, false))
                {
                    var firstScan = reader.GetFirstSpectrumNumber();
                    var lastScan  = reader.GetLastSpectrumNumber();

                    Progress.SetRange(0, peptides.Count);
                    Progress.SetMessage("Extracting MS2/MS3 information ...");
                    int count = 0;
                    foreach (var peptide in peptides)
                    {
                        count++;
                        Progress.SetPosition(count);

                        var ms2 = new MS2Item()
                        {
                            Peptide      = peptide.Peptide.Sequence,
                            Precursor    = peptide.GetPrecursorMz(),
                            Charge       = peptide.Query.Charge,
                            Modification = peptide.Modifications,
                            FileScans    = new SequestFilename[] { peptide.Query.FileScan }.ToList(),
                            Score       = peptide.Score,
                            ExpectValue = peptide.ExpectValue,
                            Proteins    = peptide.GetProteins("/")
                        };

                        for (int ms3scan = peptide.Query.FileScan.FirstScan + 1; ms3scan < lastScan; ms3scan++)
                        {
                            var mslevel = reader.GetMsLevel(ms3scan);
                            if (mslevel != 3)
                            {
                                break;
                            }
                            var pkl = reader.GetPeakList(ms3scan);
                            if (pkl.Count == 0)
                            {
                                continue;
                            }
                            var precursor = reader.GetPrecursorPeak(ms3scan);
                            pkl.PrecursorMZ = precursor.Mz;
                            ms2.MS3Spectra.Add(new MS3Item(pkl));
                        }

                        if (ms2.MS3Spectra.Count > 0)
                        {
                            ms2list.Add(ms2);
                        }
                    }
                }
            }

            Progress.SetMessage("Merging MS2 by peptide and charge ...");

            var ms2group   = ms2list.GroupBy(m => string.Format("{0}:{1}", m.Peptide, m.Charge)).ToList();
            var ms2library = new List <MS2Item>();

            foreach (var g in ms2group)
            {
                if (g.Count() < options.MinIdentifiedSpectraPerPeptide)
                {
                    continue;
                }

                var gitem = g.First();
                gitem.CombinedCount = g.Count();
                gitem.Precursor     = g.Average(m => m.Precursor);
                gitem.Score         = g.Max(m => m.Score);
                gitem.ExpectValue   = g.Min(m => m.ExpectValue);
                gitem.FileScans     = (from gg in g from fs in gg.FileScans select fs).ToList();
                foreach (var ms2 in g.Skip(1))
                {
                    gitem.MS3Spectra.AddRange(ms2.MS3Spectra);
                }

                ms2library.Add(gitem);
            }

            ms2library.Sort((m1, m2) =>
            {
                var res = m1.Peptide.CompareTo(m2.Peptide);
                if (res == 0)
                {
                    res = m1.Charge.CompareTo(m2.Charge);
                }
                return(res);
            });

            new MS2ItemXmlFormat().WriteToFile(options.OutputUncombinedFile, ms2library);

            Progress.SetMessage("Combing MS3 by precursor ...");

            var builder = new BestSpectrumTopSharedPeaksBuilder(options.FragmentPPMTolerance, options.MaxFragmentPeakCount);

            ms2library.ForEach(m => m.CombineMS3Spectra(builder, options.PrecursorPPMTolerance));

            Progress.SetMessage("Initialize terminal loss ...");
            var aas = options.GetAminoacids();

            ms2library.ForEach(l => l.InitTerminalLoss(aas, options.MaxTerminalLossLength, options.MinSequenceLength));

            new MS2ItemXmlFormat().WriteToFile(options.OutputFile, ms2library);

            Progress.End();

            return(new[] { options.OutputFile, options.OutputUncombinedFile });
        }
Exemplo n.º 16
0
        public override IEnumerable <string> Process()
        {
            //Extract chromotagraph information
            var chroOptions = new ChromatographProfileBuilderOptions();

            options.CopyProperties(chroOptions);
            chroOptions.InputFile  = options.InputFile;
            chroOptions.OutputFile = options.BoundaryOutputFile;
            chroOptions.DrawImage  = false;
            var builder = new ChromatographProfileBuilder(chroOptions);

            if (!File.Exists(options.BoundaryOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Finding envelope ...");
                builder.Progress = this.Progress;
                builder.Process();
            }

            //Calculate deuterium enrichment for peptide
            if (!File.Exists(options.DeuteriumOutputFile) || options.Overwrite)
            {
                Progress.SetMessage("Calculating deuterium ...");
                var deuteriumOptions = new RTemplateProcessorOptions()
                {
                    InputFile      = options.BoundaryOutputFile,
                    OutputFile     = options.DeuteriumOutputFile,
                    RTemplate      = DeuteriumR,
                    RExecute       = SystemUtils.GetRExecuteLocation(),
                    CreateNoWindow = true
                };

                deuteriumOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0"));
                deuteriumOptions.Parameters.Add("excludeIsotopic0<-" + (options.ExcludeIsotopic0 ? "1" : "0"));

                new RTemplateProcessor(deuteriumOptions)
                {
                    Progress = this.Progress
                }.Process();
            }

            var deuteriumMap = new AnnotationFormat().ReadFromFile(options.DeuteriumOutputFile).ToDictionary(m => m.Annotations["ChroFile"].ToString());

            //Read old spectra information
            var format  = new MascotPeptideTextFormat();
            var spectra = format.ReadFromFile(options.InputFile);

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("RetentionTime");
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var calcSpectra = new List <IIdentifiedSpectrum>();
            var aas         = new Aminoacids();

            foreach (var pep in spectra)
            {
                var filename = Path.GetFileNameWithoutExtension(builder.GetTargetFile(pep));
                if (deuteriumMap.ContainsKey(filename))
                {
                    var numExchangeableHydrogens = aas.ExchangableHAtom(pep.Peptide.PureSequence);
                    var numDeuteriumIncorporated = double.Parse(deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"] as string);

                    pep.Annotations["PeakRetentionTime"]          = deuteriumMap[filename].Annotations["RetentionTime"];
                    pep.Annotations["TheoreticalDeuterium"]       = deuteriumMap[filename].Annotations["TheoreticalDeuterium"];
                    pep.Annotations["ObservedDeuterium"]          = deuteriumMap[filename].Annotations["ObservedDeuterium"];
                    pep.Annotations["NumDeuteriumIncorporated"]   = deuteriumMap[filename].Annotations["NumDeuteriumIncorporated"];
                    pep.Annotations["NumExchangableHydrogen"]     = numExchangeableHydrogens;
                    pep.Annotations["DeuteriumEnrichmentPercent"] = numDeuteriumIncorporated / numExchangeableHydrogens;

                    calcSpectra.Add(pep);
                }
            }
            format.PeptideFormat.Headers = format.PeptideFormat.Headers + "\tPeakRetentionTime\tTheoreticalDeuterium\tObservedDeuterium\tNumDeuteriumIncorporated\tNumExchangableHydrogen\tDeuteriumEnrichmentPercent";
            format.NotExportSummary      = true;
            format.WriteToFile(GetPeptideDeteriumFile(), calcSpectra);

            var specGroup = calcSpectra.GroupBy(m => m.Peptide.PureSequence).OrderBy(l => l.Key).ToList();

            var times = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Peptide\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var peptide in specGroup)
                {
                    var curSpectra = peptide.GroupBy(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]).ToDictionary(l => l.Key, l => l.ToArray());
                    if (options.PeptideInAllTimePointOnly && times.Any(l => !curSpectra.ContainsKey(l)))
                    {
                        continue;
                    }

                    sw.Write(peptide.Key);

                    foreach (var time in times)
                    {
                        if (curSpectra.ContainsKey(time))
                        {
                            var deps      = (from spec in curSpectra[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Peptide deuterium enrichment calculation finished ...");

            return(new string[] { options.OutputFile });
        }
Exemplo n.º 17
0
        public override IEnumerable <string> Process()
        {
            //Prepare unique peptide file
            var format   = new MascotResultTextFormat();
            var proteins = format.ReadFromFile(options.InputFile);

            proteins.RemoveAmbiguousSpectra();

            var spectra = proteins.GetSpectra();

            foreach (var spec in spectra)
            {
                spec.Annotations.Remove("TheoreticalDeuterium");
                spec.Annotations.Remove("ObservedDeuterium");
                spec.Annotations.Remove("NumDeuteriumIncorporated");
                spec.Annotations.Remove("NumExchangableHydrogen");
                spec.Annotations.Remove("DeuteriumEnrichmentPercent");
            }

            var peptideFile   = Path.ChangeExtension(options.InputFile, ".unique.peptides");
            var peptideFormat = new MascotPeptideTextFormat(format.PeptideFormat.Headers);

            peptideFormat.WriteToFile(peptideFile, spectra);

            //Calculate deterium enrichment at peptide level
            var pepOptions = new DeuteriumCalculatorOptions();

            options.CopyProperties(pepOptions);
            pepOptions.InputFile  = peptideFile;
            pepOptions.OutputFile = peptideFile + ".tsv";

            var pepCalc = new PeptideDeuteriumCalculator(pepOptions);

            pepCalc.Progress = this.Progress;
            pepCalc.Process();

            //Copy annotation from calculated peptide to original peptide
            var calcSpectra   = peptideFormat.ReadFromFile(pepCalc.GetPeptideDeteriumFile());
            var oldSpectraMap = spectra.ToDictionary(m => m.Query.FileScan.LongFileName);

            foreach (var calcSpec in calcSpectra)
            {
                var oldSpec = oldSpectraMap[calcSpec.Query.FileScan.LongFileName];
                foreach (var ann in calcSpec.Annotations)
                {
                    oldSpec.Annotations[ann.Key] = ann.Value;
                }
            }

            //Remove the peptide not contain calculation result
            for (int i = proteins.Count - 1; i >= 0; i--)
            {
                foreach (var protein in proteins[i])
                {
                    protein.Peptides.RemoveAll(l => !l.Spectrum.Annotations.ContainsKey("DeuteriumEnrichmentPercent"));
                }

                if (proteins[i][0].Peptides.Count == 0)
                {
                    proteins.RemoveAt(i);
                }
            }

            format.PeptideFormat = peptideFormat.PeptideFormat;

            var noredundantFile = Path.ChangeExtension(options.OutputFile, ".individual.tsv");

            format.WriteToFile(noredundantFile, proteins);

            var times    = options.ExperimentalTimeMap.Values.Distinct().OrderBy(m => m).ToArray();
            var timeFile = Path.ChangeExtension(options.OutputFile, ".times.tsv");

            using (var sw = new StreamWriter(timeFile))
            {
                sw.WriteLine("Protein\t{0}", (from t in times select t.ToString()).Merge("\t"));

                foreach (var protein in proteins)
                {
                    var curSpectra = protein[0].GetSpectra();
                    if (options.PeptideInAllTimePointOnly)
                    {
                        var curMap = curSpectra.ToGroupDictionary(l => l.Peptide.PureSequence);
                        curSpectra.Clear();
                        foreach (var peps in curMap.Values)
                        {
                            var pepMap = peps.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);
                            if (times.All(time => pepMap.ContainsKey(time)))
                            {
                                curSpectra.AddRange(peps);
                            }
                        }
                    }

                    if (curSpectra.Count == 0)
                    {
                        continue;
                    }

                    sw.Write((from p in protein select p.Name).Merge("/"));
                    var curTimeMap = curSpectra.ToGroupDictionary(m => options.ExperimentalTimeMap[m.Query.FileScan.Experimental]);

                    foreach (var time in times)
                    {
                        if (curTimeMap.ContainsKey(time))
                        {
                            var deps      = (from spec in curTimeMap[time] select double.Parse(spec.Annotations["DeuteriumEnrichmentPercent"].ToString())).ToArray();
                            var depMedian = Statistics.Median(deps);
                            sw.Write("\t{0:0.######}", depMedian);
                        }
                        else
                        {
                            sw.Write("\tNA");
                        }
                    }
                    sw.WriteLine();
                }
            }

            Progress.SetMessage("Calculating ratio consistant ...");
            var deuteriumOptions = new RTemplateProcessorOptions()
            {
                InputFile      = timeFile,
                OutputFile     = options.OutputFile,
                RTemplate      = RatioR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(deuteriumOptions)
            {
                Progress = this.Progress
            }.Process();

            Progress.SetMessage("Finished ...");

            return(new string[] { options.OutputFile });
        }
        public override IEnumerable <string> Process(string peptideFile)
        {
            Progress.SetMessage("Loading peptide file {0}...", peptideFile);

            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(peptideFile);

            var map = peptides.ToDictionary(p => GetScan(p.Query.FileScan));

            var pepMap = new Dictionary <string, List <IIdentifiedSpectrum> >();

            Regex silac = new Regex(@"\.((?:iso|sil\d))_\d+.msm");

            Dictionary <string, StreamWriter> swmap = new Dictionary <string, StreamWriter>();

            try
            {
                int count = 0;
                foreach (var msmFile in sourceFiles)
                {
                    string resultFileName = GetResultFilename(silac, msmFile, peptideFile);
                    if (!swmap.ContainsKey(resultFileName))
                    {
                        swmap[resultFileName] = null;
                    }

                    count++;

                    Progress.SetMessage("Parsing {0}/{1} : {2} ...", count, sourceFiles.Length, msmFile);
                    using (var sr = new StreamReader(msmFile))
                    {
                        Progress.SetRange(0, sr.BaseStream.Length);
                        MascotGenericFormatSectionReader reader = new MascotGenericFormatSectionReader(sr);
                        while (reader.HasNext() && map.Count > 0)
                        {
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }

                            string title = reader.GetNextTitle();
                            var    scan  = GetScan(parser.GetValue(title));
                            if (map.ContainsKey(scan))
                            {
                                var spectrum = map[scan];
                                var section  = reader.Next();

                                var sw = swmap[resultFileName];
                                if (sw == null)
                                {
                                    sw = new StreamWriter(resultFileName);
                                    swmap[resultFileName] = sw;

                                    pepMap[resultFileName] = new List <IIdentifiedSpectrum>();
                                }

                                section.ForEach(m => sw.WriteLine(m));
                                pepMap[resultFileName].Add(spectrum);

                                map.Remove(scan);
                            }
                            else
                            {
                                reader.SkipNext();
                            }

                            Progress.SetPosition(sr.BaseStream.Position);
                        }
                    }
                }
            }
            finally
            {
                foreach (var sw in swmap.Values)
                {
                    if (sw != null)
                    {
                        sw.Close();
                    }
                }
            }

            var result = new List <string>(from k in swmap
                                           where k.Value != null
                                           select k.Key);

            foreach (var pep in pepMap)
            {
                var pepFilename = FileUtils.ChangeExtension(pep.Key, ".peptides");
                format.WriteToFile(pepFilename, pep.Value);
            }

            if (map.Count > 0)
            {
                var missed = peptideFile + ".missed";
                result.Add(missed);
                format.WriteToFile(missed, map.Values.ToList());
            }

            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var format           = new MascotPeptideTextFormat();
            var spectra          = format.ReadFromFile(options.InputFile);
            var peptideMap       = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower());
            var rawfiles         = Directory.GetFiles(options.RawDirectory, "*.raw", SearchOption.AllDirectories).ToDictionary(m => Path.GetFileNameWithoutExtension(m).ToLower());
            var rententionWindow = options.RetentionTimeWindow;

            var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray();

            if (missed.Length > 0)
            {
                throw new Exception(string.Format("Cannot find raw file of {0} in directory {1}", missed.Merge("/"), options.RawDirectory));
            }

            var option = new ParallelOptions()
            {
                //MaxDegreeOfParallelism = Math.Min(1, peptideMap.Count),
                MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, peptideMap.Count),
            };

            Parallel.ForEach(peptideMap, option, raw =>
            {
                //foreach (var raw in peptideMap)
                //{
                var peptides = raw.Value;

                Progress.SetMessage("Preparing isotopic for " + raw.Key + " ...");
                var waitingPeaks = new List <ChromatographProfile>();
                foreach (var peptide in peptides)
                {
                    string file = GetTargetFile(peptide);
                    var chro    = new ChromatographProfile()
                    {
                        Experimental   = peptide.Query.FileScan.Experimental,
                        IdentifiedScan = peptide.Query.FileScan.FirstScan,
                        ObservedMz     = peptide.GetPrecursorMz(),
                        TheoreticalMz  = peptide.GetTheoreticalMz(),
                        Charge         = peptide.Query.Charge,
                        Sequence       = peptide.Peptide.PureSequence,
                        FileName       = Path.GetFileName(file)
                    };
                    chro.InitializeIsotopicIons(options.MzTolerancePPM);
                    waitingPeaks.Add(chro);
                }

                if (waitingPeaks.Count == 0)
                {
                    //continue;
                    return;
                }

                Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >();

                List <FullMS> fullMSList = new List <FullMS>();
                Progress.SetMessage("Reading full ms list from " + rawfiles[raw.Key] + "...");
                using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[raw.Key])))
                {
                    var firstScan = rawReader.GetFirstSpectrumNumber();
                    var lastScan  = rawReader.GetLastSpectrumNumber();
                    for (int scan = firstScan; scan <= lastScan; scan++)
                    {
                        var mslevel = rawReader.GetMsLevel(scan);
                        if (mslevel == 1)
                        {
                            fullMSList.Add(new FullMS()
                            {
                                Scan          = scan,
                                RetentionTime = rawReader.ScanToRetentionTime(scan),
                                Peaks         = null
                            });
                        }
                    }

                    var chroGroups = waitingPeaks.GroupBy(chro => string.Format("{0}_{1:0.0000}", chro.Sequence, chro.TheoreticalMz));
                    foreach (var chroGroup in chroGroups)
                    {
                        List <ChromatographProfile> profileChros = new List <ChromatographProfile>();
                        foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan))
                        {
                            var masterScanIndex = 0;
                            for (int i = 1; i < fullMSList.Count; i++)
                            {
                                if (chro.IdentifiedScan < fullMSList[i].Scan)
                                {
                                    break;
                                }
                                masterScanIndex = i;
                            }
                            var masterScan          = fullMSList[masterScanIndex].Scan;
                            var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime;

                            bool bExist = false;
                            foreach (var profileChro in profileChros)
                            {
                                foreach (var pkl in profileChro.Profiles)
                                {
                                    if (pkl.Scan == fullMSList[masterScanIndex].Scan)
                                    {
                                        pkl.Identified = true;
                                        bExist         = true;
                                        break;
                                    }
                                }

                                if (bExist)
                                {
                                    break;
                                }
                            }

                            if (bExist)
                            {
                                continue;
                            }

                            Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName));

                            for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (masterRetentionTime - curRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }

                                if (scanIndex == masterScanIndex)
                                {
                                    chro.Profiles.Last().Identified = true;
                                }
                            }
                            chro.Profiles.Reverse();

                            for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++)
                            {
                                if (Progress.IsCancellationPending())
                                {
                                    throw new UserTerminatedException();
                                }

                                var curRetentionTime = fullMSList[scanIndex].RetentionTime;
                                if (curRetentionTime - masterRetentionTime > rententionWindow)
                                {
                                    break;
                                }

                                if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex))
                                {
                                    break;
                                }
                            }

                            profileChros.Add(chro);
                        }

                        profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount);
                        profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count));

                        bool bMain = true;
                        foreach (var chro in profileChros)
                        {
                            string filename;
                            if (bMain)
                            {
                                filename = Path.Combine(GetTargetDirectory(chro.Experimental), chro.FileName);
                            }
                            else
                            {
                                filename = Path.Combine(GetTargetSubDirectory(chro.Experimental), Path.ChangeExtension(chro.FileName, ".sub" + Path.GetExtension(chro.FileName)));
                            }
                            bMain = false;

                            new ChromatographProfileTextWriter().WriteToFile(filename, chro);
                            new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro);
                        }
                    }
                }
            }
                             );

            Progress.SetMessage("Finding boundaries ...");
            var boundaryOptions = new RTemplateProcessorOptions()
            {
                InputFile      = targetDir,
                OutputFile     = options.OutputFile,
                RTemplate      = BoundaryR,
                RExecute       = SystemUtils.GetRExecuteLocation(),
                CreateNoWindow = true
            };

            new RTemplateProcessor(boundaryOptions)
            {
                Progress = this.Progress
            }.Process();

            return(new string[] { options.OutputFile });
        }