Пример #1
0
        private void Rescore(string icResultFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(icResultFilePath);
            var sequences = parser.GetData("Sequence");
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var modIndex = parser.GetHeaders().IndexOf("Modifications");

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = sequences[i];
                    var charge = charges[i];
                    var scanNum = scanNums[i];
                    var composition = compositions[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);

                    var token = row.Split('\t');
                    for (var j = 0; j < token.Length; j++)
                    {
                        if(j != modIndex) writer.Write(token[j]+"\t");
                        else writer.Write("["+scores.Modifications+"]"+"\t");
                    }
                    writer.WriteLine(scores);
                }
            }
        }
Пример #2
0
        public void AddMostAbundantIsotopePeakIntensity()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv";

            var parser = new TsvFileParser(resultFilePath);
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var precursorIntensities = new double[parser.NumData];
            var tolerance = new Tolerance(10);
            for (var i = 0; i < parser.NumData; i++)
            {
                var scanNum = scanNums[i];
                var composition = compositions[i];
                var charge = charges[i];
                var precursorIon = new Ion(composition, charge);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1);
                if (isotopePeaks != null)
                {
                    var maxIntensity = 0.0;
                    for (var j = 0; j < isotopePeaks.Length; j++)
                    {
                        if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity)
                            maxIntensity = isotopePeaks[j].Intensity;
                    }
                    precursorIntensities[i] = maxIntensity;
                }
            }

            // Writing
            const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv";
            using (var writer = new StreamWriter(newResultFilePath))
            {
                writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity");
                for (var i = 0; i < parser.NumData; i++)
                {
                    writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]);
                }
            }
            Console.WriteLine("Done");
        }
Пример #3
0
        private void Rescore(string msAlignFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(msAlignFilePath);
            var sequences = parser.GetData("Peptide");
            var scanNums = parser.GetData("Scan(s)").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                    if (seqStr == null || seqStr.Contains("(")) continue; //TODO: currently ignore ids with modifications

                    var composition = AASet.GetComposition(seqStr);
                    //var sequence = new Sequence(seqStr, AASet);
                    //if (sequence == null)
                    //{
                    //    Console.WriteLine("Ignore illegal sequence: {0}", seqStr);
                    //    continue;
                    //}
                    var charge = charges[i];
                    var scanNum = scanNums[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);
                    if (scores == null) continue;

                    writer.WriteLine("{0}\t{1}", row, scores);
                }
            }
        }
Пример #4
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize = 1.1;
            const int isotopeOffsetTolerance = 2;
            var tolerance = new Tolerance(10);
            const int minCharge = 1;
            const int maxCharge = 20;
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] {"IcTarget", "IcDecoy"};
            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser = new TsvFileParser(resultFileName);
                var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences = parser.GetData("Sequence").ToArray();
                var modStrs = parser.GetData("Modifications").ToArray();
                var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];
                    
                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan = scans[i];
                        var charge = charges[i];
                        var protSequence = protSequences[i];
                        var modStr = modStrs[i];
                        var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                            isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                            comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr = parser.GetRows()[i];
                        var items = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);    
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines) writer.WriteLine(line);
                }
                Console.WriteLine("Done");
            }
        }
Пример #5
0
        public void CreatePeptideAbundanceTableWithSkyline()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Reading Henry's results
            var pepKeySet = new HashSet<string>();
            var resultDic = new Dictionary<string, Tuple<double, double>>();
            const string henryResultPath = @"H:\Research\IPRG2015\Henry_results\tsv";
            if (!Directory.Exists(henryResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, henryResultPath);
            }

            var aaSet = new AminoAcidSet();
            foreach (var resultFile in Directory.GetFiles(henryResultPath, "*.tsv"))
            {
                var fileName = Path.GetFileName(resultFile);
                if (fileName == null) continue;
                var sample = fileName.Substring(0, 2);
                Console.WriteLine("Processing {0}", sample);
                var tsvReader = new TsvFileParser(resultFile);
                var peptides = tsvReader.GetData("Peptide").ToArray();
                var charge = tsvReader.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();
                var prob = tsvReader.GetData("Prob").Select(Convert.ToDouble).ToArray();
                var qValue = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray();
                for (var i = 0; i < tsvReader.NumData; i++)
                {
                    var peptide = peptides[i];
                    var nominalMass = GetNominalMass(aaSet, peptide);
                    var key = sample + ":" + GetPeptide(peptides[i]) + ":" + nominalMass + ":" + charge[i];
                    var pepKey = GetPeptide(peptides[i]) + ":" + nominalMass;
                    pepKeySet.Add(pepKey);
                    Tuple<double, double> existingScores;
                    if (resultDic.TryGetValue(key, out existingScores))
                    {
                        if (prob[i] > existingScores.Item1)
                        {
                            resultDic[key] = new Tuple<double, double>(prob[i], qValue[i]);
                        }
                    }
                    else
                    {
                        resultDic.Add(key, new Tuple<double, double>(prob[i], qValue[i]));
                    }
                }
            }

            const string skylineFilePath = @"H:\Research\IPRG2015\MySkyline\TransitionResults.csv";
            if (!File.Exists(skylineFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, skylineFilePath);
            }

            var skylineTable = new TsvFileParser(skylineFilePath, ',');

            const string outputFilePath = @"H:\Research\IPRG2015\MySkyline\SkylineTransitionResultsWithScores3.tsv";
            using (var writer = new StreamWriter(outputFilePath))
            {
                var peptides = skylineTable.GetData("Peptide Sequence").ToArray();
                var samples = skylineTable.GetData("Replicate Name").Select(s => "" + s[0] + s[2]).ToArray();
                var charges = skylineTable.GetData("Precursor Charge").Select(c => Convert.ToInt32(c)).ToArray();
                var precursorMzs = skylineTable.GetData("Precursor Mz").Select(Convert.ToDouble).ToArray();

                writer.WriteLine("{0}\tProbability\tQValue", string.Join("\t", skylineTable.GetHeaders().Take(skylineTable.GetHeaders().Count-2)));
                for (var i = 0; i < skylineTable.NumData; i++)
                {
                    var precursorMz = precursorMzs[i];
                    var charge = charges[i];
                    var nominalMass = (int)Math.Round(((precursorMz - Constants.Proton)*charge - Composition.H2O.Mass)*
                                      Constants.RescalingConstant);
                    var pepKey = peptides[i] + ":" + nominalMass;
                    if (!pepKeySet.Contains(pepKey))
                    {
                        //Console.WriteLine("Removing {0}", pepKey);
                        continue;
                    }
                    var key = samples[i] + ":" + peptides[i] + ":" + nominalMass + ":" + charge;
                    double? prob = null, qValue = null;
                    Tuple<double, double> scores;
                    if (resultDic.TryGetValue(key, out scores))
                    {
                        prob = scores.Item1;
                        qValue = scores.Item2;
                    }
                    var skylineData = skylineTable.GetRows()[i].Split(',');
                    for (var j = 0; j < skylineData.Length - 2; j++)
                    {
                        if(j != 2) writer.Write(skylineData[j]+"\t");
                        else writer.Write("" + skylineData[j][0] + skylineData[j][2]+"\t");
                    }
                    writer.WriteLine("{0}\t{1}", 
                        prob != null ? prob.ToString() : "NA",
                        qValue != null ? qValue.ToString() : "NA");
                }
            }
            Console.WriteLine("Done");
        }