/// <summary>
        ///
        /// Get the query/peptide map from pNovo result.
        ///
        /// </summary>
        /// <param name="filename">pNovo proteins file</param>
        /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param>
        /// <param name="minScore">Minimum score of peptide identified in same spectrum</param>
        /// <returns>Query/peptide map</returns>
        public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank, double minScore)
        {
            var result = new List <IIdentifiedSpectrum>();

            SequestFilename sf = null;

            int    charge = 2;
            double expmh  = 0;

            using (var sr = new StreamReader(filename))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    var parts = line.Split('\t');
                    if (parts.Length <= 5)
                    { //spectrum information
                        var seqcount = Convert.ToInt32(parts.Last());
                        if (seqcount == 0)
                        {
                            continue;
                        }

                        sf     = parser.GetValue(parts[0]);
                        expmh  = MyConvert.ToDouble(parts[1]);
                        charge = Convert.ToInt32(parts[2]);
                    }
                    else
                    {
                        int curIndex = Convert.ToInt32(parts[0]);

                        if (curIndex <= maxRank)
                        {
                            var score = MyConvert.ToDouble(parts[2]);
                            if (score < minScore)
                            {
                                continue;
                            }

                            var curSpectrum = new IdentifiedSpectrum();
                            curSpectrum.Query.FileScan = sf;
                            curSpectrum.Query.Charge   = charge;
                            curSpectrum.ExperimentalMH = expmh;
                            curSpectrum.Score          = score;
                            result.Add(curSpectrum);

                            IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum);
                            pep.Sequence = ModifySequence(parts[9]);
                            pep.Spectrum.TheoreticalMH = MyConvert.ToDouble(parts[11]);
                            pep.Spectrum.Rank          = curIndex;
                        }
                    }
                }
            }
            return(result);
        }
        public SequestFilename GetValue(string obj)
        {
            for (int i = 0; i < parsers.Count; i++)
            {
                ITitleParser parser = parsers[i];
                try
                {
                    SequestFilename result = parser.GetValue(obj);

                    if (i != 0 && moveCount < 10)
                    {
                        parsers.Remove(parser);
                        parsers.Insert(0, parser);
                        moveCount++;
                    }

                    return(result);
                }
                catch (Exception)
                {
                }
            }

            return(MascotUtils.ParseTitle(obj, 2));
        }
Ejemplo n.º 3
0
        public SequestFilename GetFilename(PeakList <Peak> pkl, String mgfName)
        {
            if (pkl.Annotations.ContainsKey(MascotGenericFormatConstants.TITLE_TAG))
            {
                var title = (String)pkl.Annotations[MascotGenericFormatConstants.TITLE_TAG];

                SequestFilename sf = parser.GetValue(title);
                sf.Extension = "dta";
                sf.Charge    = pkl.PrecursorCharge;

                return(sf);
            }

            this.scanIndex++;
            return(new SequestFilename(mgfName, this.scanIndex, this.scanIndex, pkl.PrecursorCharge, "dta"));
        }
        private static Dictionary <string, PeakList <Peak> > ReadPeakMap(string file, ITitleParser parser)
        {
            var reader = new MascotGenericFormatReader <Peak>();
            List <PeakList <Peak> > hList = reader.ReadFromFile(file);

            return(hList.ToDictionary(m =>
            {
                var filename = parser.GetValue(m.Annotations[MascotGenericFormatConstants.TITLE_TAG] as string);
                if (string.IsNullOrEmpty(m.Experimental))
                {
                    m.Experimental = filename.Experimental;
                }
                if (m.FirstScan == 0)
                {
                    m.FirstScan = filename.FirstScan;
                }
                if (filename.Charge > 0)
                {
                    m.PrecursorCharge = filename.Charge;
                }
                return filename.FirstScan.ToString();
            }));
        }
        public List <PeakList <Peak> > ReadFromFile(string fileName)
        {
            List <PeakList <Peak> > result = new List <PeakList <Peak> >();

            using (StreamReader sr = new StreamReader(fileName))
            {
                Progress.SetRange(0, sr.BaseStream.Length);
                string line;
                Dictionary <string, string> headers = new Dictionary <string, string>();
                List <string> peaks = new List <string>();
                while ((line = sr.ReadLine()) != null)
                {
                    if (line.Trim().Equals("peaklist start"))
                    {
                        Progress.SetPosition(StreamUtils.GetCharpos(sr));

                        headers.Clear();
                        peaks.Clear();

                        bool inHeader = true;
                        while ((line = sr.ReadLine()) != null)
                        {
                            var tline = line.Trim();
                            if (tline.Equals("peaklist end"))
                            {
                                break;
                            }

                            if (tline.Length == 0)
                            {
                                continue;
                            }

                            if (!inHeader)
                            {
                                peaks.Add(tline);
                            }
                            else if (Char.IsLetter(tline[0]))
                            {
                                var pos   = tline.IndexOf('=');
                                var key   = tline.Substring(0, pos);
                                var value = tline.Substring(pos + 1);
                                headers[key] = value;
                            }
                            else
                            {
                                inHeader = false;
                                peaks.Add(tline);
                            }
                        }

                        if (headers.Count > 0 && peaks.Count > 0)
                        {
                            PeakList <Peak> pkl = new PeakList <Peak>();
                            pkl.PrecursorMZ     = MyConvert.ToDouble(headers["mz"]);
                            pkl.PrecursorCharge = Convert.ToInt32(headers["charge"]);
                            pkl.MsLevel         = 2;
                            pkl.ScanMode        = headers["fragmentation"];
                            SequestFilename sf = parser.GetValue(headers["header"]);
                            pkl.ScanTimes.Add(new ScanTime(sf.FirstScan, 0.0));
                            pkl.Experimental = sf.Experimental;

                            result.Add(pkl);

                            foreach (var l in peaks)
                            {
                                var p = l.Split('\t');
                                if (p.Length > 1)
                                {
                                    pkl.Add(new Peak(MyConvert.ToDouble(p[0]), MyConvert.ToDouble(p[1])));
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
        public override IEnumerable <string> Process(string peptideFile)
        {
            Progress.SetMessage("Loading peptide file {0}...", peptideFile);

            var format   = new MascotPeptideTextFormat();
            var peptides = format.ReadFromFile(peptideFile);

            var map = peptides.ToDictionary(p => GetScan(p.Query.FileScan));

            var pepMap = new Dictionary <string, List <IIdentifiedSpectrum> >();

            Regex silac = new Regex(@"\.((?:iso|sil\d))_\d+.msm");

            Dictionary <string, StreamWriter> swmap = new Dictionary <string, StreamWriter>();

            try
            {
                int count = 0;
                foreach (var msmFile in sourceFiles)
                {
                    string resultFileName = GetResultFilename(silac, msmFile, peptideFile);
                    if (!swmap.ContainsKey(resultFileName))
                    {
                        swmap[resultFileName] = null;
                    }

                    count++;

                    Progress.SetMessage("Parsing {0}/{1} : {2} ...", count, sourceFiles.Length, msmFile);
                    using (var sr = new StreamReader(msmFile))
                    {
                        Progress.SetRange(0, sr.BaseStream.Length);
                        MascotGenericFormatSectionReader reader = new MascotGenericFormatSectionReader(sr);
                        while (reader.HasNext() && map.Count > 0)
                        {
                            if (Progress.IsCancellationPending())
                            {
                                throw new UserTerminatedException();
                            }

                            string title = reader.GetNextTitle();
                            var    scan  = GetScan(parser.GetValue(title));
                            if (map.ContainsKey(scan))
                            {
                                var spectrum = map[scan];
                                var section  = reader.Next();

                                var sw = swmap[resultFileName];
                                if (sw == null)
                                {
                                    sw = new StreamWriter(resultFileName);
                                    swmap[resultFileName] = sw;

                                    pepMap[resultFileName] = new List <IIdentifiedSpectrum>();
                                }

                                section.ForEach(m => sw.WriteLine(m));
                                pepMap[resultFileName].Add(spectrum);

                                map.Remove(scan);
                            }
                            else
                            {
                                reader.SkipNext();
                            }

                            Progress.SetPosition(sr.BaseStream.Position);
                        }
                    }
                }
            }
            finally
            {
                foreach (var sw in swmap.Values)
                {
                    if (sw != null)
                    {
                        sw.Close();
                    }
                }
            }

            var result = new List <string>(from k in swmap
                                           where k.Value != null
                                           select k.Key);

            foreach (var pep in pepMap)
            {
                var pepFilename = FileUtils.ChangeExtension(pep.Key, ".peptides");
                format.WriteToFile(pepFilename, pep.Value);
            }

            if (map.Count > 0)
            {
                var missed = peptideFile + ".missed";
                result.Add(missed);
                format.WriteToFile(missed, map.Values.ToList());
            }

            return(result);
        }
        public override IEnumerable <string> Process(string targetDir)
        {
            var result = new List <string>();

            targetDir = new DirectoryInfo(targetDir).FullName;
            foreach (var sourceFile in sourceFiles)
            {
                var    sourceDir = new FileInfo(sourceFile).DirectoryName;
                string targetFile;
                bool   isSame = sourceDir.ToUpper() == targetDir.ToUpper();
                if (isSame)
                {
                    targetFile = sourceFile + ".tmp";
                }
                else
                {
                    targetFile = targetDir + "\\" + new FileInfo(sourceFile).Name;
                }

                var chargereg = new Regex(@"(\d+)");
                var mparser   = new MascotPepXmlParser()
                {
                    TitleParser = parser
                };
                var spectra = mparser.ReadFromFile(sourceFile).ToDictionary(m => m.Query.FileScan.ShortFileName, m => m.GetMatchSequence());

                string sequence = string.Empty;

                using (StreamReader sr = new StreamReader(sourceFile))
                {
                    using (StreamWriter sw = new StreamWriter(targetFile))
                    {
                        string line;
                        while ((line = sr.ReadLine()) != null)
                        {
                            if (line.Contains("<spectrum_query"))
                            {
                                var query = line.StringAfter("spectrum=\"").StringBefore("\"");
                                var sf    = parser.GetValue(query);
                                sequence = spectra[sf.ShortFileName];
                                sw.WriteLine(line);
                            }
                            else if (line.Contains("<modification_info"))
                            {
                                if (!line.Contains("modified_peptide"))
                                {
                                    line = line.StringBefore(">") + " modified_peptide=\"" + sequence + "\">" + line.StringAfter(">");
                                }
                                sw.WriteLine(line);
                            }
                            else
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }
                }

                if (isSame)
                {
                    File.Delete(sourceFile);
                    File.Move(targetFile, sourceFile);
                    result.Add(sourceFile);
                }
                else
                {
                    result.Add(targetFile);
                }
            }

            return(result);
        }