Beispiel #1
0
        public void GeneratePrmInfo(string resultFilePath, string outputFilePath)
        {
            Console.Write("Processing {0}", Path.GetFileName(resultFilePath));
            Console.Out.Flush();

            var rawFilePath =
                @"D:\Research\Data\EDRN\DDA\raw\" + Path.GetFileNameWithoutExtension(resultFilePath) + ".raw";
            var reader = new XCaliburReader(rawFilePath);
            var run    = InMemoryLcMsRun.GetLcMsRun(rawFilePath);

            var tolerance = new Tolerance(10, ToleranceUnit.Ppm);

            const string spikedInPeptideFile = @"D:\Research\Data\EDRN\SpikedPeptides.txt";
            var          spikedInPeptides    = File.ReadAllLines(spikedInPeptideFile);
            var          spikedInPepSet      = new HashSet <string>();

            foreach (var p in spikedInPeptides)
            {
                spikedInPepSet.Add(p);
            }
//            const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Frac7_NTT2.tsv";
            //const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Heavy\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv";
//            const string resultFilePath = @"D:\Research\Data\EDRN\DDA\NTT1_NoMod\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv";
            const double qValueThreshold = 0.01;

            var pepSet = new HashSet <string>();
            MsGfPlusHeaderInformation headerInfo = null;

            //var prefix = new HashSet<string>();
            //var suffix = new HashSet<string>();
            var numPeptides = 0;

            var prevScanNum = -1;

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("Peptide\tCharge\tMonoMz\tMostAbundantMz\tMs2ScanNum\tRtMs2\tRtApex\tRtStart\tRtEnd\tSpecEValue\tPepQValue");
                foreach (var line in File.ReadLines(resultFilePath))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum)
                    {
                        continue;
                    }
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix))
                    {
                        continue;
                    }
                    if (match.PepQValue > qValueThreshold)
                    {
                        continue;
                    }
                    var peptide = match.Peptide.Replace("C+57.021", "C").Replace("K+8.014", "K").Replace("R+10.008", "R");

                    if (pepSet.Contains(peptide))
                    {
                        continue;
                    }
                    pepSet.Add(peptide);

                    if (spikedInPepSet.Contains(peptide))
                    {
                        var ion = new Ion(match.Formula, match.Charge);
                        var mostAbundantIonMz = ion.GetMostAbundantIsotopeMz();
                        var xic = run.GetPrecursorExtractedIonChromatogram(mostAbundantIonMz, tolerance, match.ScanNum);
                        if (xic.Count == 0)
                        {
                            continue;
                        }
                        var minScan = xic.Min().ScanNum;
                        var maxScan = xic.Max().ScanNum;
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}",
                                         peptide,
                                         match.Charge,
                                         ion.GetMonoIsotopicMz(),
                                         mostAbundantIonMz,
                                         match.ScanNum,
                                         reader.RtFromScanNum(match.ScanNum),
                                         reader.RtFromScanNum(xic.GetApexScanNum()), // Rt apex
                                         reader.RtFromScanNum(minScan),              // Rt start
                                         reader.RtFromScanNum(maxScan),              // Rt end
                                         match.SpecEValue,
                                         match.PepQValue);
                        ++numPeptides;
                    }
                    //else
                    //{
                    //    foreach (var spikedInPeptide in spikedInPeptides)
                    //    {
                    //        if (spikedInPeptide.StartsWith(peptide)) prefix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum);
                    //        else if (spikedInPeptide.EndsWith(peptide)) suffix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum);
                    //    }
                    //}
                }
            }

            //Console.WriteLine("*********Prefix");
            //foreach(var p in prefix) Console.WriteLine(p);

            //Console.WriteLine("*********Suffix");
            //foreach (var p in suffix) Console.WriteLine(p);

            Console.WriteLine("\t{0}", numPeptides);
        }
Beispiel #2
0
        public void TestAbrfSpecCount()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dir = @"D:\Research\Data\IPRG2014";

            if (!Directory.Exists(dir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dir);
            }

            const double qValueThreshold = 0.01;
            var          names           = new[] { "ENO1_YEAST", "ADH1_YEAST", "CYC_BOVIN", "ALBU_BOVIN" };
            var          accessions      = new[] { "P00924", "P00330", "P62894", "P02769" };

            //const string databaseFilePath = dir + @"\database\E_coli_K12_uniprot_reviewed_2013-01-31.revCat.fasta";
            const string resultDir = dir + @"\10ppm_TI0_NTT1";

            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            Console.WriteLine("Run\tTotal PSM\t" + string.Join("\t", names));

            foreach (var msgfResultFile in Directory.GetFiles(resultDir, "*.tsv"))
            {
                var fileId = msgfResultFile.Substring(msgfResultFile.LastIndexOf('_') + 1, 3);
                Console.Write(fileId);

                var totalPsm  = 0;
                var specCount = new int[accessions.Length];

                MsGfPlusHeaderInformation headerInfo = null;

                var prevScanNum = -1;
                foreach (var line in File.ReadLines(msgfResultFile))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum)
                    {
                        continue;
                    }
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix))
                    {
                        continue;
                    }
                    if (match.QValue > qValueThreshold)
                    {
                        continue;
                    }

                    totalPsm++;

                    for (var i = 0; i < accessions.Length; i++)
                    {
                        if (match.Protein.StartsWith("sp|" + accessions[i]))
                        {
                            specCount[i]++;
                        }
                    }
                }

                Console.Write("\t" + totalPsm);
                for (var i = 0; i < accessions.Length; i++)
                {
                    Console.Write("\t{0}", specCount[i]);
                }
                Console.WriteLine();
            }
        }
Beispiel #3
0
        public void GenerateAbrfSpecCountAllProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dir = @"H:\Research\IPRG2015";

            if (!Directory.Exists(dir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dir);
            }

            const double qValueThreshold = 0.01;
            //var names = new[] { "ENO1_YEAST", "ADH1_YEAST", "CYC_BOVIN", "ALBU_BOVIN" };
            //var accessions = new[] { "P00924", "P00330", "P62894", "P02769" };

            const string resultDir = dir + @"\NTT1";

            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            var msgfResultFiles = Directory.GetFiles(resultDir, "*.tsv").ToArray();

            var specCount = new Dictionary <string, int[]>();  // protein name => array of counts

            for (var i = 0; i < msgfResultFiles.Length; i++)
            {
                var msgfResultFile = msgfResultFiles[i];

                MsGfPlusHeaderInformation headerInfo = null;

                var prevScanNum = -1;
                foreach (var line in File.ReadLines(msgfResultFile))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum)
                    {
                        continue;
                    }
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix))
                    {
                        continue;
                    }
                    if (match.QValue > qValueThreshold)
                    {
                        continue;
                    }

                    var proteins = match.Protein.Split(';');
                    foreach (var protein in proteins)
                    {
                        var   proteinName = protein.Substring(0, protein.LastIndexOf("(pre=", StringComparison.Ordinal));
                        int[] countArr;
                        if (!specCount.TryGetValue(proteinName, out countArr))
                        {
                            specCount[proteinName] = new int[msgfResultFiles.Length];
                        }
                        specCount[proteinName][i]++;
                    }
                }
            }

            // Writing
            const string databaseFilePath = dir + @"\database\iPRG2015.fasta";
            var          database         = new FastaDatabase(databaseFilePath);

            database.Read();

            //            var spikeInAccessions = new[] { "STANDARD_Alpha-Casein", "STANDARD_Beta-Lactoglobulin", "STANDARD_Carbonic-Anhydrase", "P02769"};

            const string outputFilePath = dir + @"\SpecCountAllProteins.tsv";

            using (var writer = new StreamWriter(outputFilePath))
            {
                var fileIds = msgfResultFiles.Select(f => f.Substring(f.IndexOf("_sample", StringComparison.Ordinal) + 1,
                                                                      f.LastIndexOf('.') - f.IndexOf("_sample", StringComparison.Ordinal) - 1));
                writer.WriteLine("Protein\tLength\t" + string.Join("\t", fileIds) + "\tSpikeIn");
                foreach (var entry in specCount)
                {
                    var proteinId = entry.Key;
                    var length    = database.GetProteinLength(proteinId);
                    Assert.True(length > 0);
                    var counts = entry.Value;
                    Assert.True(counts.Length == msgfResultFiles.Length);
                    var spikeIn = 0;
                    //if (spikeInAccessions.Any(spikeInAccession => proteinId.StartsWith("sp|" + spikeInAccession)))
                    if (proteinId.StartsWith("sp|"))
                    {
                        spikeIn = 1;
                    }
                    writer.WriteLine("{0}\t{1}\t{2}\t{3}", proteinId, length, string.Join("\t", counts), spikeIn);
                }
            }
        }
Beispiel #4
0
        public void TestAbrfSpecCount()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dir = @"D:\Research\Data\IPRG2014";
            if (!Directory.Exists(dir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dir);
            }

            const double qValueThreshold = 0.01;
            var names = new[] { "ENO1_YEAST", "ADH1_YEAST", "CYC_BOVIN", "ALBU_BOVIN" };
            var accessions = new[] { "P00924", "P00330", "P62894", "P02769" };

            //const string databaseFilePath = dir + @"\database\E_coli_K12_uniprot_reviewed_2013-01-31.revCat.fasta";
            const string resultDir = dir + @"\10ppm_TI0_NTT1";
            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            Console.WriteLine("Run\tTotal PSM\t" + string.Join("\t", names));

            foreach (var msgfResultFile in Directory.GetFiles(resultDir, "*.tsv"))
            {
                var fileId = msgfResultFile.Substring(msgfResultFile.LastIndexOf('_') + 1, 3);
                Console.Write(fileId);

                var totalPsm = 0;
                var specCount = new int[accessions.Length];

                MsGfPlusHeaderInformation headerInfo = null;

                var prevScanNum = -1;
                foreach (var line in File.ReadLines(msgfResultFile))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum) continue;
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix)) continue;
                    if (match.QValue > qValueThreshold) continue;

                    totalPsm++;

                    for (var i = 0; i < accessions.Length; i++)
                    {
                        if (match.Protein.StartsWith("sp|" + accessions[i]))
                        {
                            specCount[i]++;
                        }
                    }
                }

                Console.Write("\t" + totalPsm);
                for (var i = 0; i < accessions.Length; i++)
                {
                    Console.Write("\t{0}", specCount[i]);
                }
                Console.WriteLine();
            }
        }
Beispiel #5
0
        public void GenerateAbrfSpecCountAllProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dir = @"H:\Research\IPRG2015";
            if (!Directory.Exists(dir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dir);
            }

            const double qValueThreshold = 0.01;
            //var names = new[] { "ENO1_YEAST", "ADH1_YEAST", "CYC_BOVIN", "ALBU_BOVIN" };
            //var accessions = new[] { "P00924", "P00330", "P62894", "P02769" };

            const string resultDir = dir + @"\NTT1";
            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            var msgfResultFiles = Directory.GetFiles(resultDir, "*.tsv").ToArray();

            var specCount = new Dictionary<string, int[]>();  // protein name => array of counts

            for (var i = 0; i < msgfResultFiles.Length; i++)
            {
                var msgfResultFile = msgfResultFiles[i];

                MsGfPlusHeaderInformation headerInfo = null;

                var prevScanNum = -1;
                foreach (var line in File.ReadLines(msgfResultFile))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum) continue;
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix)) continue;
                    if (match.QValue > qValueThreshold) continue;

                    var proteins = match.Protein.Split(';');
                    foreach (var protein in proteins)
                    {
                        var proteinName = protein.Substring(0, protein.LastIndexOf("(pre=", StringComparison.Ordinal));
                        int[] countArr;
                        if (!specCount.TryGetValue(proteinName, out countArr)) specCount[proteinName] = new int[msgfResultFiles.Length];
                        specCount[proteinName][i]++;
                    }
                }
            }

            // Writing
            const string databaseFilePath = dir + @"\database\iPRG2015.fasta";
            var database = new FastaDatabase(databaseFilePath);
            database.Read();

            //            var spikeInAccessions = new[] { "STANDARD_Alpha-Casein", "STANDARD_Beta-Lactoglobulin", "STANDARD_Carbonic-Anhydrase", "P02769"};

            const string outputFilePath = dir + @"\SpecCountAllProteins.tsv";
            using (var writer = new StreamWriter(outputFilePath))
            {
                var fileIds = msgfResultFiles.Select(f => f.Substring(f.IndexOf("_sample", StringComparison.Ordinal) + 1,
                    f.LastIndexOf('.') - f.IndexOf("_sample", StringComparison.Ordinal) - 1));
                writer.WriteLine("Protein\tLength\t" + string.Join("\t", fileIds) + "\tSpikeIn");
                foreach (var entry in specCount)
                {
                    var proteinId = entry.Key;
                    var length = database.GetProteinLength(proteinId);
                    Assert.True(length > 0);
                    var counts = entry.Value;
                    Assert.True(counts.Length == msgfResultFiles.Length);
                    var spikeIn = 0;
                    //if (spikeInAccessions.Any(spikeInAccession => proteinId.StartsWith("sp|" + spikeInAccession)))
                    if (proteinId.StartsWith("sp|"))
                    {
                        spikeIn = 1;
                    }
                    writer.WriteLine("{0}\t{1}\t{2}\t{3}", proteinId, length, string.Join("\t", counts), spikeIn);
                }
            }
        }
Beispiel #6
0
        public void GeneratePrmInfo(string resultFilePath, string outputFilePath)
        {
            Console.Write("Processing {0}", Path.GetFileName(resultFilePath));
            Console.Out.Flush();

            var rawFilePath =
                @"D:\Research\Data\EDRN\DDA\raw\" + Path.GetFileNameWithoutExtension(resultFilePath) + ".raw";
            var reader = new XCaliburReader(rawFilePath);
            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath);

            var tolerance = new Tolerance(10, ToleranceUnit.Ppm);

            const string spikedInPeptideFile = @"D:\Research\Data\EDRN\SpikedPeptides.txt";
            var spikedInPeptides = File.ReadAllLines(spikedInPeptideFile);
            var spikedInPepSet = new HashSet<string>();
            foreach (var p in spikedInPeptides)
            {
                spikedInPepSet.Add(p);
            }
//            const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Frac7_NTT2.tsv";
            //const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Heavy\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv";
//            const string resultFilePath = @"D:\Research\Data\EDRN\DDA\NTT1_NoMod\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv";
            const double qValueThreshold = 0.01;

            var pepSet = new HashSet<string>();
            MsGfPlusHeaderInformation headerInfo = null;

            //var prefix = new HashSet<string>();
            //var suffix = new HashSet<string>();
            var numPeptides = 0;

            var prevScanNum = -1;
            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("Peptide\tCharge\tMonoMz\tMostAbundantMz\tMs2ScanNum\tRtMs2\tRtApex\tRtStart\tRtEnd\tSpecEValue\tPepQValue");
                foreach (var line in File.ReadLines(resultFilePath))
                {
                    if (line.StartsWith("#"))
                    {
                        headerInfo = new MsGfPlusHeaderInformation(line);
                        continue;
                    }

                    var match = new MsGfMatch(line, headerInfo);

                    if (match.ScanNum == prevScanNum) continue;
                    prevScanNum = match.ScanNum;

                    if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix)) continue;
                    if (match.PepQValue > qValueThreshold) continue;
                    var peptide = match.Peptide.Replace("C+57.021", "C").Replace("K+8.014", "K").Replace("R+10.008", "R");

                    if (pepSet.Contains(peptide)) continue;
                    pepSet.Add(peptide);

                    if (spikedInPepSet.Contains(peptide))
                    {
                        var ion = new Ion(match.Formula, match.Charge);
                        var mostAbundantIonMz = ion.GetMostAbundantIsotopeMz();
                        var xic = run.GetPrecursorExtractedIonChromatogram(mostAbundantIonMz, tolerance, match.ScanNum);
                        if (xic.Count == 0) continue;
                        var minScan = xic.Min().ScanNum;
                        var maxScan = xic.Max().ScanNum;
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}",
                            peptide,
                            match.Charge,
                            ion.GetMonoIsotopicMz(),
                            mostAbundantIonMz,
                            match.ScanNum,
                            reader.RtFromScanNum(match.ScanNum),
                            reader.RtFromScanNum(xic.GetApexScanNum()),    // Rt apex
                            reader.RtFromScanNum(minScan),                 // Rt start
                            reader.RtFromScanNum(maxScan),                 // Rt end
                            match.SpecEValue,
                            match.PepQValue);
                        ++numPeptides;
                    }
                    //else
                    //{
                    //    foreach (var spikedInPeptide in spikedInPeptides)
                    //    {
                    //        if (spikedInPeptide.StartsWith(peptide)) prefix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum);
                    //        else if (spikedInPeptide.EndsWith(peptide)) suffix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum);
                    //    }
                    //}
                }                
            }

            //Console.WriteLine("*********Prefix");
            //foreach(var p in prefix) Console.WriteLine(p);

            //Console.WriteLine("*********Suffix");
            //foreach (var p in suffix) Console.WriteLine(p);

            Console.WriteLine("\t{0}", numPeptides);
        }