public List<ProteinSpectrumMatch> LoadIdentificationResult(string path, ProteinSpectrumMatch.SearchTool tool = ProteinSpectrumMatch.SearchTool.Unknown, int maxPrsm = int.MaxValue)
        {
            List<ProteinSpectrumMatch> prsmList = null;

            if (tool == ProteinSpectrumMatch.SearchTool.Unknown)
            {
                if (path.EndsWith("IcTda.tsv")) tool = ProteinSpectrumMatch.SearchTool.MsPathFinder;
                else if (path.EndsWith("MSAlign_ResultTable.txt")) tool = ProteinSpectrumMatch.SearchTool.MsAlign;
                else if (path.EndsWith("msgfdb_syn.txt")) tool = ProteinSpectrumMatch.SearchTool.MsGfPlus;
            }

            if (tool == ProteinSpectrumMatch.SearchTool.MsAlign)
                prsmList = ReadMsAlignResult(path, maxPrsm);
            else if (tool == ProteinSpectrumMatch.SearchTool.MsPathFinder)
                prsmList = ReadMsPathFinderResult(path, maxPrsm);
            else if (tool == ProteinSpectrumMatch.SearchTool.MsGfPlus)
                prsmList = ReadMsGfPlusResult(path, maxPrsm);

            return prsmList;
        }
        public List<ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm)
        {
            var parser = new TsvFileParser(msAlignResultTablePath);
            var prsmList = new List<ProteinSpectrumMatch>();

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan(s)")[i]);
                var mass = double.Parse(parser.GetData("Precursor_mass")[i]);
                var protNameDesc = parser.GetData("Protein_name")[i];

                var k = protNameDesc.IndexOf(' ');
                var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k);
                var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k+1);
                
                var firstResId = int.Parse(parser.GetData("First_residue")[i]);
                var lastResId = int.Parse(parser.GetData("Last_residue")[i]);
                var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]);
                var sequenceText = parser.GetData("Peptide")[i];
                var charge = int.Parse(parser.GetData("Charge")[i]);
                var evalue = double.Parse(parser.GetData("E-value")[i]);

                var fdr = Double.Parse(parser.GetData("FDR")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign)
                {
                    SequenceText = sequenceText,
                    SpectralEvalue = evalue,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
Example #3
0
        private List<ProteinSpectrumMatch> MergePrsm(List<ProteinSpectrumMatch> targetList)
        {
            //var sortedList = targetList.OrderBy(prsm => prsm.ScanNum).ToList();
            //var minScan = sortedList.First().ScanNum;
            var maxScan = targetList.Max(prsm => prsm.ScanNum);

            var ret = new ProteinSpectrumMatch[maxScan + 1];
            foreach (var prsm in targetList)
            {
                var scan = prsm.ScanNum;

                if (ret[scan] == null)
                {
                    ret[scan] = prsm;
                }
                else
                {
                    if (prsm.SpectralEvalue < ret[scan].SpectralEvalue)
                        ret[scan] = prsm;
                }
            }

            return ret.Where(prsm => prsm != null).ToList();
        }
        public List<ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm)
        {
            var parser = new TsvFileParser(msgfResultPath);
            var prsmList = new List<ProteinSpectrumMatch>();
            var prevScanNum = -1;

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);

                if (prevScanNum == scanNum) continue;
                prevScanNum = scanNum;

                var mz = double.Parse(parser.GetData("PrecursorMZ")[i]);
                var protName = parser.GetData("Protein")[i];
                var protDesc = "";
                var score = double.Parse(parser.GetData("MSGFScore")[i]);
                var charge = int.Parse(parser.GetData("Charge")[i]);

                var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence);
                var sequenceText = GetSequenceText(seq);
                var mass = (mz - Constants.Proton)*charge;
                var firstResId = 0;
                var lastResId = 0;
                var fdr = Double.Parse(parser.GetData("QValue")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus)
                {
                    SequenceText = sequenceText,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
        public List<ProteinSpectrumMatch> ReadMsPathFinderResult(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue)
        {
            var parser = new TsvFileParser(msPathFinderResultPath);
            var prsmList = new List<ProteinSpectrumMatch>();

            var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score");
            var qValColumn = parser.GetData("QValue");

            var evalueColumn = parser.GetData("SpecEValue");

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Sequence")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);
                var mass = double.Parse(parser.GetData("Mass")[i]);
                var protName = parser.GetData("ProteinName")[i];
                var protDesc = parser.GetData("ProteinDesc")[i];
                var charge = int.Parse(parser.GetData("Charge")[i]);

                var firstResId = int.Parse(parser.GetData("Start")[i]);
                var lastResId = int.Parse(parser.GetData("End")[i]);
                var score = double.Parse(scoreColumn[i]);
                var mod = parser.GetData("Modifications")[i];
                var evalue = (evalueColumn != null) ? double.Parse(parser.GetData("SpecEValue")[i]) : 0;
                
                var pre = parser.GetData("Pre")[i];
                var post = parser.GetData("Post")[i];
                var proteinLen = int.Parse(parser.GetData("ProteinLength")[i]);

                if (score < minScore || score > maxScore) continue;

                if (qValColumn != null)
                {
                    var fdr = double.Parse(qValColumn[i]);
                    if (fdr > FdrCutoff) continue;
                }

                var sequenceText = GetSequenceText(sequence, mod);

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsPathFinder)
                {
                    SequenceText = sequenceText,
                    Modifications = mod,
                    Pre = pre,
                    Post = post,
                    ProteinLength = proteinLen,
                    SpectralEvalue = evalue,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }