private void MergePeptides(MascotResult result)
        {
            var peptideMap = new Dictionary <String, IIdentifiedPeptide>();

            foreach (IIdentifiedProteinGroup group in result)
            {
                foreach (IIdentifiedProtein protein in group)
                {
                    for (int i = 0; i < protein.Peptides.Count; i++)
                    {
                        String pepid = protein.Peptides[i].Spectrum.Query.QueryId + "_" + protein.Peptides[i].Sequence;
                        if (peptideMap.ContainsKey(pepid))
                        {
                            IIdentifiedPeptide old = peptideMap[pepid];

                            old.AddProtein(protein.Name);
                            protein.Peptides[i] = old;
                        }
                        else
                        {
                            peptideMap[pepid] = protein.Peptides[i];
                        }
                    }
                }
            }
        }
        private void RefineModification(MascotResult result)
        {
            var mods = new List <string>(this.modifications.Keys);

            mods.Sort();

            for (int i = 0; i < mods.Count; i++)
            {
                this.modifications[mods[i]] = ModificationConsts.MODIFICATION_CHAR[i + 1];
            }

            List <IIdentifiedSpectrum> spectra = result.GetSpectra();

            foreach (IIdentifiedSpectrum spectrum in spectra)
            {
                foreach (IIdentifiedPeptide pep in spectrum.Peptides)
                {
                    if (null == spectrum.Modifications)
                    {
                        continue;
                    }

                    string seq = pep.Sequence;
                    string mod = spectrum.Modifications;

                    string[] modStrs = mod.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (string modStr in modStrs)
                    {
                        Match m       = this.modificationReg.Match(modStr.Trim());
                        char  modChar = this.modifications[m.Groups[1].Value];
                        seq = seq.Replace("<u>" + m.Groups[2].Value + "</u>", m.Groups[2].Value + modChar);
                        seq = seq.Replace("<U>" + m.Groups[2].Value + "</U>", m.Groups[2].Value + modChar);
                    }

                    seq = seq.Replace("<u>", "");
                    seq = seq.Replace("</u>", "*");
                    seq = seq.Replace("<U>", "");
                    seq = seq.Replace("</U>", "*");
                    seq = seq.Replace(".*", ".");

                    pep.Sequence = seq;
                }
            }
        }
Пример #3
0
        public IIdentifiedResult ReadFromFile(string proteinFile)
        {
            if (!File.Exists(proteinFile))
            {
                throw new FileNotFoundException("File not exist : " + proteinFile);
            }

            var result = new MascotResult();

            var peptideMap = new Dictionary <string, IIdentifiedSpectrum>();

            using (var filein = new StreamReader(proteinFile))
            {
                string lastLine;
                while ((lastLine = filein.ReadLine()) != null)
                {
                    if (lastLine.StartsWith("Locus"))
                    {
                        this.proteinConverter = IdentifiedProteinPropertyConverterFactory.GetInstance().GetConverters(lastLine, '\t',
                                                                                                                      "Dtaselect");
                    }

                    if (lastLine.StartsWith("Unique"))
                    {
                        this.peptideConverter = IdentifiedSpectrumPropertyConverterFactory.GetInstance().GetConverters(lastLine,
                                                                                                                       '\t',
                                                                                                                       "Dtaselect");
                        break;
                    }
                }

                IIdentifiedProteinGroup group;
                lastLine = null;
                while ((group = ReadNextProteinGroup(filein, peptideMap, ref lastLine)) != null)
                {
                    result.Add(group);
                }
            }

            return(result);
        }
Пример #4
0
        public void TestGetAnnotationKeys()
        {
            string key1 = "TEST1";
            string key2 = "TEST2";

            var mph1 = new IdentifiedSpectrum();

            mph1.Annotations.Add(key1, null);
            mph1.Query.FileScan.Experimental = "EXP1";
            new IdentifiedPeptide(mph1);

            var mph2 = new IdentifiedSpectrum();

            mph2.Annotations.Add(key2, null);
            mph2.Query.FileScan.Experimental = "EXP2";
            new IdentifiedPeptide(mph2);

            mph1.Peptide.Sequence = "SEQ1";
            mph2.Peptide.Sequence = "SEQ2";

            var protein = new IdentifiedProtein();

            protein.Peptides.Add(mph1.Peptide);
            protein.Peptides.Add(mph2.Peptide);

            var mpg = new IdentifiedProteinGroup();

            mpg.Add(protein);

            var mr = new MascotResult();

            mr.Add(mpg);

            List <string> annotationKeys = AnnotationUtils.GetAnnotationKeys(mr.GetSpectra());

            Assert.AreEqual(2, annotationKeys.Count);
            Assert.IsTrue(annotationKeys.Contains(key1));
            Assert.IsTrue(annotationKeys.Contains(key2));
        }
        public MascotResult ParseContent(String fileContent)
        {
            var result = new MascotResult();

            this.modifications = new Dictionary <string, char>();

            Pair <int, double> pValueScore = ParsePValueScore(fileContent);

            result.PValueScore = pValueScore.First;
            result.PValue      = pValueScore.Second;

            var offsets = new List <Offset>();

            try
            {
                result.PeakIsotopicType = ParsePeakIsotopicType(fileContent);
            }
            catch (ArgumentException)
            {
            }

            try
            {
                result.PeakTolerance = ParsePeakTolerance(fileContent);
            }
            catch (ArgumentException)
            {
            }

            var filters = new List <IFilter <IIdentifiedSpectrum> >();

            if (this.filterByDefaultScoreAndPvalue)
            {
                filters.Add(new IdentifiedSpectrumScoreFilter(pValueScore.First));
                filters.Add(new IdentifiedSpectrumExpectValueFilter(pValueScore.Second));
            }

            filters.Add(new IdentifiedSpectrumRankFilter(1));
            if (null != this.defaultPeptideFilter)
            {
                filters.Add(this.defaultPeptideFilter);
            }

            this.currentPeptideFilter = new AndFilter <IIdentifiedSpectrum>(filters);

            Match proteinMatch = GetProteinRegex().Match(fileContent);

            while (proteinMatch.Success)
            {
                IdentifiedProtein protein = ParseProtein(proteinMatch.Groups[1].Value);
                var group = new IdentifiedProteinGroup();
                group.Add(protein);
                result.Add(group);
                offsets.Add(new Offset(proteinMatch.Index, proteinMatch.Index + proteinMatch.Length, group));
                proteinMatch = proteinMatch.NextMatch();
            }

            int endIndex = fileContent.IndexOf("Peptide matches not assigned to protein hits");

            if (-1 == endIndex)
            {
                endIndex = fileContent.Length - 1;
            }

            for (int i = 0; i < offsets.Count; i++)
            {
                int    start     = offsets[i].End;
                int    end       = i == offsets.Count - 1 ? endIndex : offsets[i + 1].Start;
                String redundant = fileContent.Substring(start, end - start + 1);
                if (!redundant.Contains("Proteins matching the same set"))
                {
                    continue;
                }

                List <IdentifiedProtein> sameMatchProteins = ParseSameMatchProteins(redundant);

                foreach (IdentifiedProtein mp in sameMatchProteins)
                {
                    mp.Peptides.AddRange(offsets[i].Mpg[0].Peptides);
                    offsets[i].Mpg.Add(mp);
                }
            }

            for (int i = result.Count - 1; i >= 0; i--)
            {
                if (0 == result[i][0].Peptides.Count)
                {
                    result.RemoveAt(i);
                }
            }

            RefineModification(result);

            MergePeptides(result);

            result.InitUniquePeptideCount();

            return(result);
        }