Exemple #1
0
        private void Read(string ms1FtFileName)
        {
            var ftFileParser = new TsvFileParser(ms1FtFileName);
            var monoMassArr = ftFileParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();
            var minScanArray = ftFileParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScanArray = ftFileParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var repScanArray = ftFileParser.GetData("RepScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minChargeArray = ftFileParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxChargeArray = ftFileParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var scoreArray = ftFileParser.GetData("LikelihoodRatio").Select(Convert.ToDouble).ToArray();
            var featureCountFiltered = 0;

            for (var i = 0; i < monoMassArr.Length; i++)
            {
                //if (flagArray[i] == 0 && probArray[i] < _minProbability)  continue;
                if (scoreArray[i] < _minLikelihoodRatio) continue;
                featureCountFiltered++;
                var monoMass = monoMassArr[i];
                _lcMsChargeMap.SetMatches(monoMass, minScanArray[i], maxScanArray[i], repScanArray[i], minChargeArray[i], maxChargeArray[i]);
            }

            // NOTE: The DMS Analysis Manager looks for this statistic; do not change it
            Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length);
            _lcMsChargeMap.CreateMassToScanNumMap();
        }
Exemple #2
0
        public void GenerateVennDiagrams()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // DIA
            const string dir = @"H:\Research\EDRN\Ic\DIA_Replicate";

            const string rep1 = dir + @"\EDRN_Serum_07_DIA_1_01_13Nov13_Samwise_13-07-28_IcTda.tsv";
            //const string rep2 = dir + @"\EDRN_Serum_07_DIA_1_02_13Nov13_Samwise_13-07-28_IcTda.tsv";
            //const string rep3 = dir + @"\EDRN_Serum_07_DIA_1_03_13Nov13_Samwise_13-07-28_IcTda.tsv";
            const string rep4 = dir + @"\EDRN_Serum_07_DIA_1_04_13Nov13_Samwise_13-07-28_IcTda.tsv";
            //const string rep5 = dir + @"\EDRN_Serum_07_DIA_1_05_18Nov13_Samwise_13-07-28_IcTda.tsv";

            const string resultPath1 = rep1;
            const string resultPath2 = rep4;

            var result1 = new TsvFileParser(resultPath1);
            var result2 = new TsvFileParser(resultPath2);

            const double pepQValueThreshold = 0.01;
            var vennDiagram = new VennDiagram<string>(result1.GetPeptidesAboveQValueThreshold(pepQValueThreshold),
                                                      result2.GetPeptidesAboveQValueThreshold(pepQValueThreshold));
            Console.WriteLine("{0}\t{1}\t{2}",
                              vennDiagram.Set1Only.Count, // + vennDiagram.Intersection.Count,
                              vennDiagram.Intersection.Count,
                              vennDiagram.Set2Only.Count //+ vennDiagram.Intersection.Count
                              );
        }
Exemple #3
0
        private void Read(string isosFileName)
        {
            var icrToolsparser = new TsvFileParser(isosFileName, ',');
            var monoMassArr = icrToolsparser.GetData("monoisotopic_mw").Select(Convert.ToDouble).ToArray();
            var scanArray = icrToolsparser.GetData("scan_num").Select(s => Convert.ToInt32(s)).ToArray();
            var chargeArray = icrToolsparser.GetData("charge").Select(s => Convert.ToInt32(s)).ToArray();

            var fitStringArr = icrToolsparser.GetData("fit");
            var fitArray = fitStringArr == null ? null : icrToolsparser.GetData("fit").Select(Convert.ToDouble).ToArray();

            var featureCountFiltered = 0;

            var minMass = double.MaxValue;
            var maxMass = 0.0;
            for (var i = 0; i < monoMassArr.Length; i++)
            {
                if (fitArray != null && fitArray[i] > _fitScoreThreshold || chargeArray[i] <= 1) 
                    continue;

                featureCountFiltered++;

                var scan = scanArray[i];
                var monoMass = monoMassArr[i];
                if (minMass > monoMass) minMass = monoMass;
                if (maxMass < monoMass) maxMass = monoMass;

                var minScan = _run.GetPrevScanNum(scan, 1);
                var maxScan = _run.GetNextScanNum(scan, 1);
                _lcMsMatchMap.SetMatches(monoMass, minScan, maxScan);
            }

            Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length);

            _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _massTolerance, minMass, maxMass);
        }
Exemple #4
0
        private void Rescore(string icResultFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(icResultFilePath);
            var sequences = parser.GetData("Sequence");
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var modIndex = parser.GetHeaders().IndexOf("Modifications");

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = sequences[i];
                    var charge = charges[i];
                    var scanNum = scanNums[i];
                    var composition = compositions[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);

                    var token = row.Split('\t');
                    for (var j = 0; j < token.Length; j++)
                    {
                        if(j != modIndex) writer.Write(token[j]+"\t");
                        else writer.Write("["+scores.Modifications+"]"+"\t");
                    }
                    writer.WriteLine(scores);
                }
            }
        }
        public void TestVennDiagram()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string result1Path = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod_NTT1.tsv";
            const string result2Path = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT1_Test\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTda.tsv";

            if (!File.Exists(result1Path))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, result1Path);
            }

            if (!File.Exists(result2Path))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, result2Path);
            }

            const double pepQValueThreshold = 0.01;
            var result1 = new TsvFileParser(result1Path);
            var result2 = new TsvFileParser(result2Path);

            var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold),
                                                      result2.GetPeptides(pepQValueThreshold));

            var intersectionPeptides = vennDiagram.Intersection;
            Console.WriteLine(vennDiagram.Set1 + " " + vennDiagram.Set2);
            Console.WriteLine(vennDiagram.Set1 + " " + vennDiagram.Intersection + " " + vennDiagram.Set2Only);
        }
        public IList<SpectrumMatch> Read()
        {
            var specMatches = new List<SpectrumMatch>();
            var tsvFile = new TsvFileParser(_fileName);
            var precursorCharges = tsvFile.GetData(PrecursorChargeHeader);
            var scans = tsvFile.GetData(ScanHeader);

            var peptides = tsvFile.GetData(BottomUpPeptideHeader);
            if (scans == null)
                throw new FormatException();

            var pepQValues = tsvFile.GetData(PepQValueHeader);
            var formulas = tsvFile.GetData(FormulaHeader);

            var peptideSet = new HashSet<string>();

            for (int i = 0; i < peptides.Count; i++)
            {
                if (Convert.ToDouble(pepQValues[i]) > PepQValueThreshold || peptideSet.Contains(peptides[i])) continue;
                peptideSet.Add(peptides[i]);
                var scanNum = Convert.ToInt32(scans[i]);
//                    var spectrum = lcms.GetSpectrum(scanNum);
//                    var spec = spectrum as ProductSpectrum;
//                    if (spec == null || spec.ActivationMethod != Act) continue;
                int precursorCharge = Convert.ToInt32(precursorCharges[i]);
                specMatches.Add((formulas != null && formulas[i] != null)
                    ? new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy, formulas[i])
                    : new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy));
            }
            return specMatches;
        }
        public void TestReadingTmtResultFile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);
           
            const string filePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSGFPlusResultTMT10.tsv";
            if (!File.Exists(filePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, filePath);
            }

            var parser = new TsvFileParser(filePath);
            var pepStrs = parser.GetData("Peptide");
            var formulaStrs = parser.GetData("Formula");

            Assert.True(pepStrs.Count == formulaStrs.Count);

            var peptides = pepStrs.Select(Sequence.GetSequenceFromMsGfPlusPeptideStr).ToList();
            var formulae = formulaStrs.Select(Composition.Parse).ToList();

            Assert.True(peptides.Count == formulae.Count);

            for (var i = 0; i < peptides.Count; i++)
            {
                Assert.True((peptides[i].Composition + Composition.H2O).Equals(formulae[i]));
            }
        }
Exemple #8
0
        public IList<SpectrumMatch> Read()
        {
            var specMatches = new List<SpectrumMatch>();
            var tsvFile = new TsvFileParser(_fileName);
            var precursorCharges = tsvFile.GetData(PrecursorChargeHeader);
            var scans = tsvFile.GetData(ScanHeader);

            var peptides = tsvFile.GetData(TopDownPeptideHeader);
            if (peptides != null)
            {
                var peptideSet = new HashSet<string>();
                const double filterThreshold = QValueThreshold;
                var filterValues = tsvFile.GetData(QValueHeader);

                var aset = new AminoAcidSet();

                for (int i = 0; i < peptides.Count; i++)
                {
                    if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) continue;
                    peptideSet.Add(peptides[i]);
                    var scanNum = Convert.ToInt32(scans[i]);
                    int precursorCharge = Convert.ToInt32(precursorCharges[i]);
                    specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy));
                }
            }
            return specMatches;
        }
Exemple #9
0
        public void AddMostAbundantIsotopePeakIntensity()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv";

            var parser = new TsvFileParser(resultFilePath);
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var precursorIntensities = new double[parser.NumData];
            var tolerance = new Tolerance(10);
            for (var i = 0; i < parser.NumData; i++)
            {
                var scanNum = scanNums[i];
                var composition = compositions[i];
                var charge = charges[i];
                var precursorIon = new Ion(composition, charge);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1);
                if (isotopePeaks != null)
                {
                    var maxIntensity = 0.0;
                    for (var j = 0; j < isotopePeaks.Length; j++)
                    {
                        if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity)
                            maxIntensity = isotopePeaks[j].Intensity;
                    }
                    precursorIntensities[i] = maxIntensity;
                }
            }

            // Writing
            const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv";
            using (var writer = new StreamWriter(newResultFilePath))
            {
                writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity");
                for (var i = 0; i < parser.NumData; i++)
                {
                    writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]);
                }
            }
            Console.WriteLine("Done");
        }
Exemple #10
0
        public void SummarizeAnilResults()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFolder = @"H:\Research\Anil\Oct28";
            if (!Directory.Exists(resultFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultFolder);
            }

            var actMethods = new[] { ActivationMethod.CID, ActivationMethod.ETD, ActivationMethod.HCD };

            Console.WriteLine("Data\tCID\t\tETD\t\tHCD\t");
            Console.WriteLine("\tNumId\tMaxMass\tNumId\tMaxMass\tNumId\tMaxMass");
            foreach (var rawFile in Directory.GetFiles(resultFolder, "*.raw"))
            {
                var datasetName = Path.GetFileNameWithoutExtension(rawFile);

                var resultFile = Path.Combine(Path.GetDirectoryName(rawFile), datasetName + "_IcTda.tsv");
                var numId = new Dictionary<ActivationMethod, int>();
                var maxMass = new Dictionary<ActivationMethod, double>();

                foreach (var actMethod in actMethods)
                {
                    numId[actMethod] = 0;
                    maxMass[actMethod] = 0.0;
                }

                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var tsvParser = new TsvFileParser(resultFile);
                var qValues = tsvParser.GetData("QValue").Select(Convert.ToDouble).ToArray();
                var scanNums = tsvParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var masses = tsvParser.GetData("Mass").Select(Convert.ToDouble).ToArray();

                for (var i = 0; i < qValues.Length; i++)
                {
                    if (qValues[i] > 0.01) break;
                    var scanNum = scanNums[i];
                    var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                    Assert.True(spec != null);
                    ++numId[spec.ActivationMethod];

                    var mass = masses[i];
                    if (mass > maxMass[spec.ActivationMethod]) maxMass[spec.ActivationMethod] = mass;
                }
                Console.Write(datasetName);
                foreach (var actMethod in actMethods)
                {
                    Console.Write("\t" + numId[actMethod]);
                    Console.Write("\t" + maxMass[actMethod]);
                }
                Console.WriteLine();
            }
        }
Exemple #11
0
        public void TestScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var rawFile = @"\\protoapps\UserData\Jungkap\Joshua\testData\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf" ;
            var resultFile = @"\\protoapps\UserData\Jungkap\Joshua\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(rawFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile);
                return;
            }

            if (!File.Exists(resultFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile);
                return;
            }

            var tsvParser = new TsvFileParser(resultFile);
            var tsvData = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];

            var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0);

            for (int i = 0; i < 1; i++)
            {

                var scanNum = Int32.Parse(ms2ScanNumbers[i]);
                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;
                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);

                var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var aaSet = new AminoAcidSet();
                var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet);
                Console.WriteLine(sequence.Count);
                var score = GetScoreTest(sequence, spectrum);
                Console.WriteLine(scanNum + ":" + score);

            }
        }
        public List<ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm)
        {
            var parser = new TsvFileParser(msgfResultPath);
            var prsmList = new List<ProteinSpectrumMatch>();
            var prevScanNum = -1;

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);

                if (prevScanNum == scanNum) continue;
                prevScanNum = scanNum;

                var mz = double.Parse(parser.GetData("PrecursorMZ")[i]);
                var protName = parser.GetData("Protein")[i];
                var protDesc = "";
                var score = double.Parse(parser.GetData("MSGFScore")[i]);
                var charge = int.Parse(parser.GetData("Charge")[i]);

                var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence);
                var sequenceText = GetSequenceText(seq);
                var mass = (mz - Constants.Proton)*charge;
                var firstResId = 0;
                var lastResId = 0;
                var fdr = Double.Parse(parser.GetData("QValue")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus)
                {
                    SequenceText = sequenceText,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
        public List<ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm)
        {
            var parser = new TsvFileParser(msAlignResultTablePath);
            var prsmList = new List<ProteinSpectrumMatch>();

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan(s)")[i]);
                var mass = double.Parse(parser.GetData("Precursor_mass")[i]);
                var protNameDesc = parser.GetData("Protein_name")[i];

                var k = protNameDesc.IndexOf(' ');
                var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k);
                var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k+1);
                
                var firstResId = int.Parse(parser.GetData("First_residue")[i]);
                var lastResId = int.Parse(parser.GetData("Last_residue")[i]);
                var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]);
                var sequenceText = parser.GetData("Peptide")[i];
                var charge = int.Parse(parser.GetData("Charge")[i]);
                var evalue = double.Parse(parser.GetData("E-value")[i]);

                var fdr = Double.Parse(parser.GetData("FDR")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign)
                {
                    SequenceText = sequenceText,
                    SpectralEvalue = evalue,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
Exemple #14
0
        public void TestClusterCentricSearch()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string pfResultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            if (!File.Exists(pfResultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pfResultFilePath);
            }

            var tsvReader = new TsvFileParser(pfResultFilePath);

            var ms2Scans = tsvReader.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var compositions = tsvReader.GetData("Composition").ToArray();
            var qValues = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray();

            var compScanTable = new Dictionary<string, IList<int>>();
            for(var i=0; i<qValues.Length; i++)
            {
                var qValue = qValues[i];
                if (qValue > 0.01) break;
                IList<int> scanNums;
                if(compScanTable.TryGetValue(compositions[i], out scanNums))
                {
                    scanNums.Add(ms2Scans[i]);
                }
                else
                {
                    compScanTable.Add(compositions[i], new List<int> {ms2Scans[i]});
                }
            }

            Console.Write("NumCompositions: {0}", compScanTable.Keys.Count);

            //const string featureFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

        }
Exemple #15
0
        private void Parse(string tagFilePath)
        {
            var tagParser = new TsvFileParser(tagFilePath);
            var scan = tagParser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var sequence = tagParser.GetData("SequenceTag").ToArray();
            var isPrefix = tagParser.GetData("IsPrefix").Select(s => s.Equals("1")).ToArray();
            var flankingMass = tagParser.GetData("FlankingMass").Select(Convert.ToDouble).ToArray();
            for (var i = 0; i < tagParser.NumData; i++)
            {
                if (sequence[i].Length < _minTagLength) continue;
                var tag = new SequenceTag(scan[i], sequence[i], isPrefix[i], flankingMass[i]);

                IList<SequenceTag> tagList;
                if (_scanToTags.TryGetValue(scan[i], out tagList))
                {
                    if (tagList.Count < _numTagsPerScan) tagList.Add(tag);
                }
                else
                {
                    _scanToTags.Add(scan[i], new List<SequenceTag> { tag });
                }
            }
        }
        public void ValidateIcResultsWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var sequences = parser.GetData("Sequence");
            var modifications = parser.GetData("Modifications");
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var aaSet = new AminoAcidSet();
            for (var i = 0; i < parser.NumData; i++)
            {
                var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O;

                var modComposition = Composition.Zero;
                var modsStr = modifications[i].Substring(1, modifications[i].Length - 2);
                var mods = modsStr.Split(',');
                foreach(var modStr in mods)
                {
                    if (modStr.Length == 0) continue;
                    var modName = modStr.Split()[0];
                    var mod = Modification.Get(modName);
                    modComposition += mod.Composition;
                }

                var compFromSeqAndMods = sequenceComp + modComposition;
                Assert.True(compFromSeqAndMods.Equals(compositions[i]));
            }
        }
Exemple #17
0
        private void Rescore(string msAlignFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(msAlignFilePath);
            var sequences = parser.GetData("Peptide");
            var scanNums = parser.GetData("Scan(s)").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                    if (seqStr == null || seqStr.Contains("(")) continue; //TODO: currently ignore ids with modifications

                    var composition = AASet.GetComposition(seqStr);
                    //var sequence = new Sequence(seqStr, AASet);
                    //if (sequence == null)
                    //{
                    //    Console.WriteLine("Ignore illegal sequence: {0}", seqStr);
                    //    continue;
                    //}
                    var charge = charges[i];
                    var scanNum = scanNums[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);
                    if (scores == null) continue;

                    writer.WriteLine("{0}\t{1}", row, scores);
                }
            }
        }
Exemple #18
0
        private bool Parse(string fileName)
        {
            var parser = new TsvFileParser(fileName);
            var scan = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var pre = parser.GetData("Pre").Where(s => s.Length == 1).Select(p => p[0]).ToArray();
            if (pre.Length != parser.NumData) return false;
            var sequence = parser.GetData("Sequence").ToArray();
            var post = parser.GetData("Post").Where(s => s.Length == 1).Select(p => p[0]).ToArray();
            if (post.Length != parser.NumData) return false;
            var mod = parser.GetData("Modifications").ToArray();
            var composition = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var proteinName = parser.GetData("ProteinName").ToArray();
            var proteinDesc = parser.GetData("ProteinDesc").ToArray();
            var proteinLength = parser.GetData("ProteinLength").Select(s => Convert.ToInt32(s)).ToArray();
            var start = parser.GetData("Start").Select(s => Convert.ToInt32(s)).ToArray();
            var end = parser.GetData("End").Select(s => Convert.ToInt32(s)).ToArray();
            var charge = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var mostAbundantIsotopeMz = parser.GetData("MostAbundantIsotopeMz").Select(Convert.ToDouble).ToArray();
            var mass = parser.GetData("Mass").Select(Convert.ToDouble).ToArray();
            var numMatchedFragment = parser.GetData("#MatchedFragments").Select(s => Convert.ToInt32(s)).ToArray();
            var qValue = parser.GetData("QValue").Select(Convert.ToDouble).ToArray();
            var pepQValue = parser.GetData("PepQValue").Select(Convert.ToDouble).ToArray();

            for (var i = 0; i < parser.NumData; i++)
            {
                var id = new MsPathFinderId(scan[i], pre[i], sequence[i], post[i], mod[i], 
                    composition[i], proteinName[i], proteinDesc[i], proteinLength[i],
                    start[i], end[i], charge[i], mostAbundantIsotopeMz[i], mass[i], 
                    numMatchedFragment[i], qValue[i], pepQValue[i])
                ;
                _idList.Add(id);

                if(!_scanNumToPrSm.ContainsKey(scan[i])) _scanNumToPrSm.Add(scan[i], id);
            }
            return true;
        }
Exemple #19
0
        public void CompareIpaIc()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultDir = @"D:\Research\Data\UW\QExactive\Ic_NTT2_03";
            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            var targetPeptides = new HashSet<string>();
            foreach (var icResultFilePath in Directory.GetFiles(resultDir, "*DIA*IcTarget.tsv"))
            {
                var icParser = new TsvFileParser(icResultFilePath);
                foreach (var peptide in icParser.GetData("Sequence")) targetPeptides.Add(peptide);
            }

            const string ipaResultPath = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv";
            if (!File.Exists(ipaResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, methodName);
            }            

            var parser = new TsvFileParser(ipaResultPath);
            var ipaPeptides = parser.GetPeptides(0.005).Select(p => p.Replace("C+57.021", "C"));
            var ipaOnly = 0;
            var both = 0;
            foreach (var ipaPeptide in ipaPeptides)
            {
                if (targetPeptides.Contains(ipaPeptide)) ++both;
                else
                {
                    ++ipaOnly;
                    Console.WriteLine(ipaPeptide);
                }
            }

            Console.WriteLine("Both: {0}, IpaOnly: {1}, Sum: {2}", both, ipaOnly, both+ipaOnly);
        }
Exemple #20
0
        public string ProcessFile(string rawFile, string resultFile, string methodName)
        {
            if (!File.Exists(rawFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile);
                return "\n";
            }

            if (!File.Exists(resultFile))
            {
                Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile);
                return "\n";
            }

            var tsvParser = new TsvFileParser(resultFile);
            var headerList = tsvParser.GetHeaders();
            var tsvData = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];

            var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0);
            
            var resultLine = "";
            for (int i = 0; i < ms2ScanNumbers.Count; i++)
            {

                var scanNum = Int32.Parse(ms2ScanNumbers[i]);
                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;
                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);

                var qValue = Double.Parse(tsvData["QValue"].ElementAt(tsvIndex));
                if (qValue > 0.01) continue;

                var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var matchedFrags = tsvData["#MatchedFragments"].ElementAt(tsvIndex).Trim();
                var aaSet = new AminoAcidSet();
                var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet);
                var tol = new Tolerance(10);
                var sequenceFinder = new SequenceTagIndexFinder(tol, 1, 10);
                var results = sequenceFinder.GetLongestSequence(spectrum, sequence);
                resultLine += String.Format("{0},{1},{2},{3},{4},{5},{6},{7},{8},\n", scanNum, matchedFrags, seqStr, results.Item1, results.Item2,results.Item3,results.Item4,results.Item5,results.Item6);
            }
            return resultLine;
        }
        public void TestSequenceTag()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string TestRawFile = @"D:\\Vlad_TopDown\\raw\\yufeng_column_test2.raw";
            //const string TestResultFile = @"D:\\Vlad_TopDown\\results\\yufeng_column_test2_IcTda.tsv";
            const string TestRawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string TestResultFile = @"D:\MassSpecFiles\training\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            //const string TestRawFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01.raw";
            //const string TestResultFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01_IcTda.tsv";

            if (!File.Exists(TestRawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile);
            }

            if (!File.Exists(TestResultFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile);
            }

            // Configure amino acid set
            
            var aminoAcidList = new List<AminoAcid>();
            foreach (var aa in AminoAcid.StandardAminoAcidArr)
            {
                aminoAcidList.Add(aa);
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Acetylation));
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Oxidation));

            }


            //const int MaxTags = 100000;
            var tsvParser = new TsvFileParser(TestResultFile);
            var headerList = tsvParser.GetHeaders();
            var tsvData = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];
        
            var run = PbfLcMsRun.GetLcMsRun(TestRawFile);
            var nSpec = 0;
            var nHitSpec = 0;

            for (var i = 0; i < ms2ScanNumbers.Count; i++)
            //foreach(var scanNum in targetScans)
            {
                var scanNum = Int32.Parse(ms2ScanNumbers[i]);

                //if (scanNum != 4672) continue;
                
                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;

                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);
                var qValue = double.Parse(tsvData["QValue"].ElementAt(tsvIndex));
                if (qValue > 0.01) break;

                var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var modStr = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var tolerance = new Tolerance(5);
                var tagFinder = new SequenceTagFinder(spectrum, tolerance, 5, 8, aminoAcidList.ToArray());
                var nTags = 0;
                var nHit = 0;

                var seqOjb = Sequence.CreateSequence(seqStr, modStr, new AminoAcidSet());
                var compWithoutH2O = seqOjb.Composition - Composition.H2O;

                //Console.WriteLine(compWithoutH2O.Mass);

                foreach (var seqTagStr in tagFinder.GetAllSequenceTagString())
                {
                    if (seqStr.Contains(seqTagStr.Sequence)) //|| seqStr.Contains(Reverse(tagStr)))
                    {

                        //var idx = seqStr.IndexOf(seqTagStr.Sequence);

                        //seqStr.Substring(0, idx)
                        /*var comp2 = seqOjb.GetComposition(0, idx);

                        Console.Write(comp2.Mass);
                        Console.Write("\t");

                        Console.Write(seqTagStr.FlankingMass);
                        Console.Write("\t");
                        Console.Write(seqTagStr.Sequence);
                        Console.Write("\t");
                        Console.Write(seqTagStr.IsPrefix);
                        Console.WriteLine("");
                        */
                        if (seqStr.Contains(seqTagStr.Sequence)) nHit++;
                    }
                    nTags++;                    
                }
                
                nSpec++;
                if (nHit > 0) nHitSpec++;

                Console.WriteLine(@"[{0}]seqLen = {1}: {2}/{3}", scanNum, seqStr.Length, nHit, nTags);
            }
            //var existingTags = tagFinder.ExtractExistingSequneceTags(sequence);
            Console.Write("{0}/{1}", nHitSpec, nSpec);
        }
        public void CountMatchedProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const int minTagLength = 3;

            var scanToProtein = new Dictionary<int, string>();
            var idTag = new Dictionary<int, bool>();
            const string resultFilePath = @"H:\Research\ProMex\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var proteinNames = parser.GetData("ProteinName").ToArray();
            var qValues = parser.GetData("QValue").Select(Convert.ToDouble).ToArray();
            for (var i = 0; i < qValues.Length; i++)
            {
                if (qValues[i] > 0.01) break;
                scanToProtein.Add(scans[i], proteinNames[i]);
                idTag.Add(scans[i], false);
            }

            const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";
//            const string fastaFilePath =
//                @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            Console.WriteLine("Sequence length: {0}", fastaDb.GetSequence().Length);

            const string tagFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.seqtag";
            if (!File.Exists(tagFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFilePath);
            }

            var hist = new Dictionary<int, int>();
            
            var scanSet = new HashSet<int>();
            HashSet<string> proteinSetForThisScan = null;
            var prevScan = -1;
            var totalNumMatches = 0L;
            var isHeader = true;
            foreach (var line in File.ReadAllLines(tagFilePath))
            {
                if (isHeader)
                {
                    isHeader = false;
                    continue;
                }

                var token = line.Split('\t');
                if (token.Length < 3) continue;
                var scan = Convert.ToInt32(token[0]);
                var proteinId = scanToProtein.ContainsKey(scan) ? scanToProtein[scan] : null;

                if (scan != prevScan)
                {
                    if (proteinSetForThisScan != null)
                    {
                        var numMatches = proteinSetForThisScan.Count;
                        int numOcc;
                        if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1;
                        else hist.Add(numMatches, 1);
                    }

                    prevScan = scan;
                    proteinSetForThisScan = new HashSet<string>();
                }

                scanSet.Add(scan);
                var tag = token[1];
                if (tag.Length < minTagLength) continue;

                if (proteinSetForThisScan == null) continue;

                var numMatchesForThisTag = 0;
                foreach (var matchedProtein in searchableDb.FindAllMatchedSequenceIndices(tag)
                    .Select(index => fastaDb.GetProteinName(index)))
                {
                    proteinSetForThisScan.Add(matchedProtein);
                    ++numMatchesForThisTag;

                    if (proteinId != null && matchedProtein.Equals(proteinId))
                    {
                        idTag[scan] = true;
                    }
                }
                totalNumMatches += numMatchesForThisTag;
//                if (numMatchesForThisTag > 10)
//                {
//                    Console.WriteLine("{0}\t{1}", tag, numMatchesForThisTag);
//                }
            }

            if (proteinSetForThisScan != null)
            {
                var numMatches = proteinSetForThisScan.Count;
                int numOcc;
                if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1;
                else hist.Add(numMatches, 1);
            }
            
            Console.WriteLine("AvgNumMatches: {0}", totalNumMatches/(float)scanSet.Count);
            Console.WriteLine("Histogram:");
            foreach (var entry in hist.OrderBy(e => e.Key))
            {
                Console.WriteLine("{0}\t{1}", entry.Key, entry.Value);
            }

            Console.WriteLine("NumId: {0}", idTag.Count);
            Console.WriteLine("NumIdByTag: {0}", idTag.Select(e => e.Value).Count(v => v));
        }
Exemple #23
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);
            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm = 10;
            var tolerance = new Tolerance(tolerancePpm);
            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);
            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;
            
            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var numComparisons = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums = tsvReader.GetData("Scan(s)");
            var charges = tsvReader.GetData("Charge");
            var scores = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet = new HashSet<string>();
            var allSeqSet = new HashSet<string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs = 0;
            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4) continue;
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("(")) continue;
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #24
0
        public void TestMs1Filtering()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath =
            //    @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv";
                @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }
                
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            //const int minPrecursorCharge = 3;
            //const int maxPrecursorCharge = 30;
            //const int tolerancePpm = 15;
            var tolerance = new Tolerance(15);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40);
            ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //ISequenceFilter ms1Filter = ms1BasedFilter;

            var tsvReader = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums = tsvReader.GetData("ScanNum");
            var charges = tsvReader.GetData("Charge");
            var qValues = tsvReader.GetData("QValue");
            var scores = tsvReader.GetData("Score");

            //var sequences = tsvReader.GetData("Annotation");

            //var hist = new int[11];

            Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks");
            for (var i = 0; i < compositions.Count; i++)
            {

                if (qValues != null)
                {
                    var qValue = Convert.ToDouble(qValues[i]);
                    if (qValue > 0.01) continue;
                }

                var scanNum = Convert.ToInt32(scanNums[i]);
                var composition = Composition.Parse(compositions[i]);
                var charge = Convert.ToInt32(charges[i]);

                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                var score = Convert.ToDouble(scores[i]);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);

                var apexScanNum = xicMostAbundant.GetApexScanNum();
                if (apexScanNum < run.MinLcScan) apexScanNum = scanNum;
                //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum);
//                var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1);
//                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;

                var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12/charge, tolerance, scanNum);

                var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope);

                var precursorIonChargeMinusOne = new Ion(composition, charge - 1);
                var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne);

                var precursorIonChargePlusOne = new Ion(composition, charge + 1);
                var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne);

                //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max();
                //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", 
                //    scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count);
            }

            //Console.WriteLine("Histogram");
            //for (var i = 0; i < hist.Length; i++)
            //{
            //    Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]);
            //}
        }
Exemple #25
0
        public void FilteringEfficiency()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);
            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm = 10;
            var tolerance = new Tolerance(tolerancePpm);
            sw.Reset();
            sw.Start();
            //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm);
//            
            //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run,
            //    minPrecursorCharge, maxPrecursorCharge,
            //    0, 0,
            //    600.0, 1800.0, new Tolerance(tolerancePpm), null);
            //ms1BasedFilter.CachePrecursorMatchesBinCentric();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40);
            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var numComparisons = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums = tsvReader.GetData("ScanNum");
            var charges = tsvReader.GetData("Charge");
            var scores = tsvReader.GetData("Score");
            var qvalues = tsvReader.GetData("QValue");
            var sequences = tsvReader.GetData("Sequence");

            var sequenceCount = new Dictionary<string, int>();
            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01) continue;
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13) continue;
                }
                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);
                var composition = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                var sequence = sequences[i];
                int count;
                if (sequenceCount.TryGetValue(sequence, out count)) sequenceCount[sequence] = count + 1;
                else sequenceCount[sequence] = 1;
            }
            //var sequences = tsvReader.GetData("Annotation");

            var seqSet = new HashSet<string>();
            var allSeqSet = new HashSet<string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs = 0;
            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01) continue;
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13) continue;
                }
                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);
                var composition = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance);
                ////xic.Display();
                //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false);
                //var apexSpec = run.GetSpectrum(apexScanNum);
                //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons/(double)(maxBinNum-minBinNum+1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs/(double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count/(double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #26
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize = 1.1;
            const int isotopeOffsetTolerance = 2;
            var tolerance = new Tolerance(10);
            const int minCharge = 1;
            const int maxCharge = 20;
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] {"IcTarget", "IcDecoy"};
            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser = new TsvFileParser(resultFileName);
                var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences = parser.GetData("Sequence").ToArray();
                var modStrs = parser.GetData("Modifications").ToArray();
                var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];
                    
                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan = scans[i];
                        var charge = charges[i];
                        var protSequence = protSequences[i];
                        var modStr = modStrs[i];
                        var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                            isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                            comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr = parser.GetRows()[i];
                        var items = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);    
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines) writer.WriteLine(line);
                }
                Console.WriteLine("Done");
            }
        }
        private double[][] LoadTable(string fname)
        {
            if (!File.Exists(fname)) throw new FileNotFoundException("Missing score datafile: " + fname);
            
            var parser = new TsvFileParser(fname);
            var table = new double[_massBins.Length][];
            
            for (var i = 0; i < _massBins.Length; i++)
            {
                table[i] = new double[NumberOfBins];

                for (var k = 0; k < NumberOfBins; k++)
                {
                    var colData = parser.GetData(string.Format("{0}", k));
                    table[i][k] = double.Parse(colData[i]);
                }
            }
            return table;
        }
        public void TestTempCompRefLcMsFeatureAlign()
        {
            const string dataFolder = @"D:\MassSpecFiles\CompRef";
            const string fastaFilePath = @"D:\MassSpecFiles\CompRef\db\ID_003278_4B4B3CB1.fasta";
            var fastaDb = new FastaDatabase(fastaFilePath);
            fastaDb.Read();

            var fileEntries = Directory.GetFiles(dataFolder);

            var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList();
            dataset.Sort();

            for (var i = 0; i < dataset.Count; i++)
            {
                var writer =
                    new StreamWriter(string.Format(@"D:\MassSpecFiles\CompRef\MsPathFinderMerged\{0}_IcTda.tsv",
                        dataset[i]));
                
                writer.Write("Scan");
                writer.Write("\t");
                writer.Write("Sequence");
                writer.Write("\t");
                writer.Write("Modifications");
                writer.Write("\t");
                writer.Write("Mass");
                writer.Write("\t");
                writer.Write("ProteinName");
                writer.Write("\t");
                writer.Write("ProteinDesc");
                writer.Write("\t");
                writer.Write("Start");
                writer.Write("\t");
                writer.Write("End");
                writer.Write("\t");
                writer.Write("#MatchedFragments");
                writer.Write("\t");
                writer.Write("QValue");
                writer.Write("\n");


                var path1 = string.Format(@"D:\MassSpecFiles\CompRef\MsPathFinder\{0}_IcTda.tsv", dataset[i]);
                var parser1 = new TsvFileParser(path1);
                OutputMergedResult(writer, parser1, fastaDb);

                var path2 = string.Format(@"D:\MassSpecFiles\CompRef\seqtag\{0}_tagmatch.tsv", dataset[i]);
                var parser2 = new TsvFileParser(path2);
                OutputMergedResult(writer, parser2, fastaDb);
                writer.Close();
            }
        }
        private void OutputMergedResult(StreamWriter writer, TsvFileParser parser, FastaDatabase fastaDb)
        {
            var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score");
            var qValColumn = parser.GetData("QValue");

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Sequence")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);
                var mass = double.Parse(parser.GetData("Mass")[i]);
                var protName = parser.GetData("ProteinName")[i];
                var protDesc = fastaDb.GetProteinDescription(protName);
                
                var firstResId = int.Parse(parser.GetData("Start")[i]);
                var lastResId = int.Parse(parser.GetData("End")[i]);
                var score = double.Parse(scoreColumn[i]);
                var mod = parser.GetData("Modifications")[i];
                var qvalue = (qValColumn != null) ? qValColumn[i] : "0";

                writer.Write(scanNum);
                writer.Write("\t");
                writer.Write(sequence);
                writer.Write("\t");
                writer.Write(mod);
                writer.Write("\t");
                writer.Write(mass);
                writer.Write("\t");
                writer.Write(protName);
                writer.Write("\t");
                writer.Write(protDesc);
                writer.Write("\t");
                writer.Write(firstResId);
                writer.Write("\t");
                writer.Write(lastResId);
                writer.Write("\t");
                writer.Write(score);
                writer.Write("\t");
                writer.Write(qvalue);
                writer.Write("\n");
            }

        }
Exemple #30
0
        public void TestInitialScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string icResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_03_NoMod_NoRescoring\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTarget.tsv";
            if (!File.Exists(icResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath);
            }

            var icParser = new TsvFileParser(icResultPath);
            var icScans = icParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var icPeptides = icParser.GetData("Sequence");
            var icScore = icParser.GetData("Score").Select(s => Convert.ToInt32(s)).ToArray();
            var map = new Dictionary<string, int>();
            for (var i = 0; i < icParser.NumData; i++)
            {
                map.Add(icScans[i]+":"+icPeptides[i], icScore[i]);
            }

            const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv";
            if (!File.Exists(msgfPlusResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath);
            }

            var msgfPlusResults = new MsGfResults(msgfPlusResultPath);
            var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01);
            //Console.WriteLine("NumMatches: {0}", matches.Count);
            Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tIcScore");
            foreach (var match in matches)
            {
                var scanNum = match.ScanNum;
                var peptide = match.Peptide;
                var specEValue = match.SpecEValue;
                int score;
                if (!map.TryGetValue(scanNum + ":" + peptide, out score)) score = -1;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, score);
            }
        }