コード例 #1
0
        public void TestReadingTmtResultFile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);
           
            const string filePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSGFPlusResultTMT10.tsv";
            if (!File.Exists(filePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, filePath);
            }

            var parser = new TsvFileParser(filePath);
            var pepStrs = parser.GetData("Peptide");
            var formulaStrs = parser.GetData("Formula");

            Assert.True(pepStrs.Count == formulaStrs.Count);

            var peptides = pepStrs.Select(Sequence.GetSequenceFromMsGfPlusPeptideStr).ToList();
            var formulae = formulaStrs.Select(Composition.Parse).ToList();

            Assert.True(peptides.Count == formulae.Count);

            for (var i = 0; i < peptides.Count; i++)
            {
                Assert.True((peptides[i].Composition + Composition.H2O).Equals(formulae[i]));
            }
        }
コード例 #2
0
ファイル: IcRescorer.cs プロジェクト: javamng/GitHUB
        private void Rescore(string icResultFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(icResultFilePath);
            var sequences = parser.GetData("Sequence");
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var modIndex = parser.GetHeaders().IndexOf("Modifications");

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = sequences[i];
                    var charge = charges[i];
                    var scanNum = scanNums[i];
                    var composition = compositions[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);

                    var token = row.Split('\t');
                    for (var j = 0; j < token.Length; j++)
                    {
                        if(j != modIndex) writer.Write(token[j]+"\t");
                        else writer.Write("["+scores.Modifications+"]"+"\t");
                    }
                    writer.WriteLine(scores);
                }
            }
        }
コード例 #3
0
ファイル: IsosFilter.cs プロジェクト: javamng/GitHUB
        private void Read(string isosFileName)
        {
            var icrToolsparser = new TsvFileParser(isosFileName, ',');
            var monoMassArr = icrToolsparser.GetData("monoisotopic_mw").Select(Convert.ToDouble).ToArray();
            var scanArray = icrToolsparser.GetData("scan_num").Select(s => Convert.ToInt32(s)).ToArray();
            var chargeArray = icrToolsparser.GetData("charge").Select(s => Convert.ToInt32(s)).ToArray();

            var fitStringArr = icrToolsparser.GetData("fit");
            var fitArray = fitStringArr == null ? null : icrToolsparser.GetData("fit").Select(Convert.ToDouble).ToArray();

            var featureCountFiltered = 0;

            var minMass = double.MaxValue;
            var maxMass = 0.0;
            for (var i = 0; i < monoMassArr.Length; i++)
            {
                if (fitArray != null && fitArray[i] > _fitScoreThreshold || chargeArray[i] <= 1) 
                    continue;

                featureCountFiltered++;

                var scan = scanArray[i];
                var monoMass = monoMassArr[i];
                if (minMass > monoMass) minMass = monoMass;
                if (maxMass < monoMass) maxMass = monoMass;

                var minScan = _run.GetPrevScanNum(scan, 1);
                var maxScan = _run.GetNextScanNum(scan, 1);
                _lcMsMatchMap.SetMatches(monoMass, minScan, maxScan);
            }

            Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length);

            _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _massTolerance, minMass, maxMass);
        }
コード例 #4
0
ファイル: IcBottomUpTsvReader.cs プロジェクト: javamng/GitHUB
        public IList<SpectrumMatch> Read()
        {
            var specMatches = new List<SpectrumMatch>();
            var tsvFile = new TsvFileParser(_fileName);
            var precursorCharges = tsvFile.GetData(PrecursorChargeHeader);
            var scans = tsvFile.GetData(ScanHeader);

            var peptides = tsvFile.GetData(BottomUpPeptideHeader);
            if (scans == null)
                throw new FormatException();

            var pepQValues = tsvFile.GetData(PepQValueHeader);
            var formulas = tsvFile.GetData(FormulaHeader);

            var peptideSet = new HashSet<string>();

            for (int i = 0; i < peptides.Count; i++)
            {
                if (Convert.ToDouble(pepQValues[i]) > PepQValueThreshold || peptideSet.Contains(peptides[i])) continue;
                peptideSet.Add(peptides[i]);
                var scanNum = Convert.ToInt32(scans[i]);
//                    var spectrum = lcms.GetSpectrum(scanNum);
//                    var spec = spectrum as ProductSpectrum;
//                    if (spec == null || spec.ActivationMethod != Act) continue;
                int precursorCharge = Convert.ToInt32(precursorCharges[i]);
                specMatches.Add((formulas != null && formulas[i] != null)
                    ? new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy, formulas[i])
                    : new SpectrumMatch(peptides[i], DataFileFormat.IcBottomUp, _lcms, scanNum, precursorCharge, _decoy));
            }
            return specMatches;
        }
コード例 #5
0
ファイル: DiaTsvReader.cs プロジェクト: javamng/GitHUB
        public IList<SpectrumMatch> Read()
        {
            var specMatches = new List<SpectrumMatch>();
            var tsvFile = new TsvFileParser(_fileName);
            var precursorCharges = tsvFile.GetData(PrecursorChargeHeader);
            var scans = tsvFile.GetData(ScanHeader);

            var peptides = tsvFile.GetData(TopDownPeptideHeader);
            if (peptides != null)
            {
                var peptideSet = new HashSet<string>();
                const double filterThreshold = QValueThreshold;
                var filterValues = tsvFile.GetData(QValueHeader);

                var aset = new AminoAcidSet();

                for (int i = 0; i < peptides.Count; i++)
                {
                    if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) continue;
                    peptideSet.Add(peptides[i]);
                    var scanNum = Convert.ToInt32(scans[i]);
                    int precursorCharge = Convert.ToInt32(precursorCharges[i]);
                    specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy));
                }
            }
            return specMatches;
        }
コード例 #6
0
ファイル: TestYufengData.cs プロジェクト: javamng/GitHUB
        public void AddMostAbundantIsotopePeakIntensity()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv";

            var parser = new TsvFileParser(resultFilePath);
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var precursorIntensities = new double[parser.NumData];
            var tolerance = new Tolerance(10);
            for (var i = 0; i < parser.NumData; i++)
            {
                var scanNum = scanNums[i];
                var composition = compositions[i];
                var charge = charges[i];
                var precursorIon = new Ion(composition, charge);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1);
                if (isotopePeaks != null)
                {
                    var maxIntensity = 0.0;
                    for (var j = 0; j < isotopePeaks.Length; j++)
                    {
                        if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity)
                            maxIntensity = isotopePeaks[j].Intensity;
                    }
                    precursorIntensities[i] = maxIntensity;
                }
            }

            // Writing
            const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv";
            using (var writer = new StreamWriter(newResultFilePath))
            {
                writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity");
                for (var i = 0; i < parser.NumData; i++)
                {
                    writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]);
                }
            }
            Console.WriteLine("Done");
        }
コード例 #7
0
ファイル: TestResults.cs プロジェクト: javamng/GitHUB
        public void SummarizeAnilResults()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFolder = @"H:\Research\Anil\Oct28";
            if (!Directory.Exists(resultFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultFolder);
            }

            var actMethods = new[] { ActivationMethod.CID, ActivationMethod.ETD, ActivationMethod.HCD };

            Console.WriteLine("Data\tCID\t\tETD\t\tHCD\t");
            Console.WriteLine("\tNumId\tMaxMass\tNumId\tMaxMass\tNumId\tMaxMass");
            foreach (var rawFile in Directory.GetFiles(resultFolder, "*.raw"))
            {
                var datasetName = Path.GetFileNameWithoutExtension(rawFile);

                var resultFile = Path.Combine(Path.GetDirectoryName(rawFile), datasetName + "_IcTda.tsv");
                var numId = new Dictionary<ActivationMethod, int>();
                var maxMass = new Dictionary<ActivationMethod, double>();

                foreach (var actMethod in actMethods)
                {
                    numId[actMethod] = 0;
                    maxMass[actMethod] = 0.0;
                }

                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var tsvParser = new TsvFileParser(resultFile);
                var qValues = tsvParser.GetData("QValue").Select(Convert.ToDouble).ToArray();
                var scanNums = tsvParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var masses = tsvParser.GetData("Mass").Select(Convert.ToDouble).ToArray();

                for (var i = 0; i < qValues.Length; i++)
                {
                    if (qValues[i] > 0.01) break;
                    var scanNum = scanNums[i];
                    var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                    Assert.True(spec != null);
                    ++numId[spec.ActivationMethod];

                    var mass = masses[i];
                    if (mass > maxMass[spec.ActivationMethod]) maxMass[spec.ActivationMethod] = mass;
                }
                Console.Write(datasetName);
                foreach (var actMethod in actMethods)
                {
                    Console.Write("\t" + numId[actMethod]);
                    Console.Write("\t" + maxMass[actMethod]);
                }
                Console.WriteLine();
            }
        }
コード例 #8
0
ファイル: Ms1FtFilter.cs プロジェクト: javamng/GitHUB
        private void Read(string ms1FtFileName)
        {
            var ftFileParser = new TsvFileParser(ms1FtFileName);
            var monoMassArr = ftFileParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();
            var minScanArray = ftFileParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScanArray = ftFileParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var repScanArray = ftFileParser.GetData("RepScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minChargeArray = ftFileParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxChargeArray = ftFileParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var scoreArray = ftFileParser.GetData("LikelihoodRatio").Select(Convert.ToDouble).ToArray();
            var featureCountFiltered = 0;

            for (var i = 0; i < monoMassArr.Length; i++)
            {
                //if (flagArray[i] == 0 && probArray[i] < _minProbability)  continue;
                if (scoreArray[i] < _minLikelihoodRatio) continue;
                featureCountFiltered++;
                var monoMass = monoMassArr[i];
                _lcMsChargeMap.SetMatches(monoMass, minScanArray[i], maxScanArray[i], repScanArray[i], minChargeArray[i], maxChargeArray[i]);
            }

            // NOTE: The DMS Analysis Manager looks for this statistic; do not change it
            Console.Write(@"{0}/{1} features loaded...", featureCountFiltered, monoMassArr.Length);
            _lcMsChargeMap.CreateMassToScanNumMap();
        }
コード例 #9
0
ファイル: TestLcMsCaching.cs プロジェクト: javamng/GitHUB
        public void TestClusterCentricSearch()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string pfResultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            if (!File.Exists(pfResultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pfResultFilePath);
            }

            var tsvReader = new TsvFileParser(pfResultFilePath);

            var ms2Scans = tsvReader.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var compositions = tsvReader.GetData("Composition").ToArray();
            var qValues = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray();

            var compScanTable = new Dictionary<string, IList<int>>();
            for(var i=0; i<qValues.Length; i++)
            {
                var qValue = qValues[i];
                if (qValue > 0.01) break;
                IList<int> scanNums;
                if(compScanTable.TryGetValue(compositions[i], out scanNums))
                {
                    scanNums.Add(ms2Scans[i]);
                }
                else
                {
                    compScanTable.Add(compositions[i], new List<int> {ms2Scans[i]});
                }
            }

            Console.Write("NumCompositions: {0}", compScanTable.Keys.Count);

            //const string featureFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V4_JP_Len500\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

        }
コード例 #10
0
ファイル: SequenceTagParser.cs プロジェクト: javamng/GitHUB
        private void Parse(string tagFilePath)
        {
            var tagParser = new TsvFileParser(tagFilePath);
            var scan = tagParser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var sequence = tagParser.GetData("SequenceTag").ToArray();
            var isPrefix = tagParser.GetData("IsPrefix").Select(s => s.Equals("1")).ToArray();
            var flankingMass = tagParser.GetData("FlankingMass").Select(Convert.ToDouble).ToArray();
            for (var i = 0; i < tagParser.NumData; i++)
            {
                if (sequence[i].Length < _minTagLength) continue;
                var tag = new SequenceTag(scan[i], sequence[i], isPrefix[i], flankingMass[i]);

                IList<SequenceTag> tagList;
                if (_scanToTags.TryGetValue(scan[i], out tagList))
                {
                    if (tagList.Count < _numTagsPerScan) tagList.Add(tag);
                }
                else
                {
                    _scanToTags.Add(scan[i], new List<SequenceTag> { tag });
                }
            }
        }
コード例 #11
0
        public void ValidateIcResultsWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var sequences = parser.GetData("Sequence");
            var modifications = parser.GetData("Modifications");
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var aaSet = new AminoAcidSet();
            for (var i = 0; i < parser.NumData; i++)
            {
                var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O;

                var modComposition = Composition.Zero;
                var modsStr = modifications[i].Substring(1, modifications[i].Length - 2);
                var mods = modsStr.Split(',');
                foreach(var modStr in mods)
                {
                    if (modStr.Length == 0) continue;
                    var modName = modStr.Split()[0];
                    var mod = Modification.Get(modName);
                    modComposition += mod.Composition;
                }

                var compFromSeqAndMods = sequenceComp + modComposition;
                Assert.True(compFromSeqAndMods.Equals(compositions[i]));
            }
        }
コード例 #12
0
ファイル: MSAlignRescorer.cs プロジェクト: javamng/GitHUB
        private void Rescore(string msAlignFilePath, string outputFilePath)
        {
            var parser = new TsvFileParser(msAlignFilePath);
            var sequences = parser.GetData("Peptide");
            var scanNums = parser.GetData("Scan(s)").Select(s => Convert.ToInt32(s)).ToArray();
            var charges = parser.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();

            var rows = parser.GetRows();
            var headers = parser.GetHeaders();

            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("{0}\t{1}", string.Join("\t", headers), IcScores.GetScoreNames());
                for (var i = 0; i < parser.NumData; i++)
                {
                    var row = rows[i];
                    var seqStr = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                    if (seqStr == null || seqStr.Contains("(")) continue; //TODO: currently ignore ids with modifications

                    var composition = AASet.GetComposition(seqStr);
                    //var sequence = new Sequence(seqStr, AASet);
                    //if (sequence == null)
                    //{
                    //    Console.WriteLine("Ignore illegal sequence: {0}", seqStr);
                    //    continue;
                    //}
                    var charge = charges[i];
                    var scanNum = scanNums[i];

                    var scores = _topDownScorer.GetScores(AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm, composition, charge, scanNum);
                    if (scores == null) continue;

                    writer.WriteLine("{0}\t{1}", row, scores);
                }
            }
        }
コード例 #13
0
ファイル: TestLcMsCaching.cs プロジェクト: javamng/GitHUB
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);
            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm = 10;
            var tolerance = new Tolerance(tolerancePpm);
            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);
            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;
            
            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var numComparisons = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums = tsvReader.GetData("Scan(s)");
            var charges = tsvReader.GetData("Charge");
            var scores = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet = new HashSet<string>();
            var allSeqSet = new HashSet<string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs = 0;
            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4) continue;
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("(")) continue;
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
コード例 #14
0
ファイル: TestLcMsCaching.cs プロジェクト: javamng/GitHUB
        public void TestMs1Filtering()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath =
            //    @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv";
                @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }
                
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            //const int minPrecursorCharge = 3;
            //const int maxPrecursorCharge = 30;
            //const int tolerancePpm = 15;
            var tolerance = new Tolerance(15);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40);
            ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //ISequenceFilter ms1Filter = ms1BasedFilter;

            var tsvReader = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums = tsvReader.GetData("ScanNum");
            var charges = tsvReader.GetData("Charge");
            var qValues = tsvReader.GetData("QValue");
            var scores = tsvReader.GetData("Score");

            //var sequences = tsvReader.GetData("Annotation");

            //var hist = new int[11];

            Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks");
            for (var i = 0; i < compositions.Count; i++)
            {

                if (qValues != null)
                {
                    var qValue = Convert.ToDouble(qValues[i]);
                    if (qValue > 0.01) continue;
                }

                var scanNum = Convert.ToInt32(scanNums[i]);
                var composition = Composition.Parse(compositions[i]);
                var charge = Convert.ToInt32(charges[i]);

                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                var score = Convert.ToDouble(scores[i]);

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);

                var apexScanNum = xicMostAbundant.GetApexScanNum();
                if (apexScanNum < run.MinLcScan) apexScanNum = scanNum;
                //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum);
//                var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1);
//                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;

                var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12/charge, tolerance, scanNum);

                var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope);

                var precursorIonChargeMinusOne = new Ion(composition, charge - 1);
                var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne);

                var precursorIonChargePlusOne = new Ion(composition, charge + 1);
                var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne);

                //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max();
                //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", 
                //    scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count);
            }

            //Console.WriteLine("Histogram");
            //for (var i = 0; i < hist.Length; i++)
            //{
            //    Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]);
            //}
        }
コード例 #15
0
ファイル: TestLcMsCaching.cs プロジェクト: javamng/GitHUB
        public void FilteringEfficiency()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);
            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm = 10;
            var tolerance = new Tolerance(tolerancePpm);
            sw.Reset();
            sw.Start();
            //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm);
//            
            //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20);
            //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run,
            //    minPrecursorCharge, maxPrecursorCharge,
            //    0, 0,
            //    600.0, 1800.0, new Tolerance(tolerancePpm), null);
            //ms1BasedFilter.CachePrecursorMatchesBinCentric();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40);
            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var numComparisons = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var compositions = tsvReader.GetData("Composition");
            var scanNums = tsvReader.GetData("ScanNum");
            var charges = tsvReader.GetData("Charge");
            var scores = tsvReader.GetData("Score");
            var qvalues = tsvReader.GetData("QValue");
            var sequences = tsvReader.GetData("Sequence");

            var sequenceCount = new Dictionary<string, int>();
            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01) continue;
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13) continue;
                }
                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);
                var composition = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                var sequence = sequences[i];
                int count;
                if (sequenceCount.TryGetValue(sequence, out count)) sequenceCount[sequence] = count + 1;
                else sequenceCount[sequence] = 1;
            }
            //var sequences = tsvReader.GetData("Annotation");

            var seqSet = new HashSet<string>();
            var allSeqSet = new HashSet<string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs = 0;
            for (var i = 0; i < compositions.Count; i++)
            {
                if (qvalues != null)
                {
                    var qValue = Convert.ToDouble(qvalues[i]);
                    if (qValue > 0.01) continue;
                }
                else
                {
                    var score = Convert.ToDouble(scores[i]);
                    if (score < 13) continue;
                }
                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);
                var composition = Composition.Parse(compositions[i]);
                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;

                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance);
                ////xic.Display();
                //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false);
                //var apexSpec = run.GetSpectrum(apexScanNum);
                //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons/(double)(maxBinNum-minBinNum+1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs/(double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count/(double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
コード例 #16
0
        public List<ProteinSpectrumMatch> ReadMsPathFinderResult(string msPathFinderResultPath, int maxPrsm, double minScore = 3, double maxScore = int.MaxValue)
        {
            var parser = new TsvFileParser(msPathFinderResultPath);
            var prsmList = new List<ProteinSpectrumMatch>();

            var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score");
            var qValColumn = parser.GetData("QValue");

            var evalueColumn = parser.GetData("SpecEValue");

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Sequence")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);
                var mass = double.Parse(parser.GetData("Mass")[i]);
                var protName = parser.GetData("ProteinName")[i];
                var protDesc = parser.GetData("ProteinDesc")[i];
                var charge = int.Parse(parser.GetData("Charge")[i]);

                var firstResId = int.Parse(parser.GetData("Start")[i]);
                var lastResId = int.Parse(parser.GetData("End")[i]);
                var score = double.Parse(scoreColumn[i]);
                var mod = parser.GetData("Modifications")[i];
                var evalue = (evalueColumn != null) ? double.Parse(parser.GetData("SpecEValue")[i]) : 0;
                
                var pre = parser.GetData("Pre")[i];
                var post = parser.GetData("Post")[i];
                var proteinLen = int.Parse(parser.GetData("ProteinLength")[i]);

                if (score < minScore || score > maxScore) continue;

                if (qValColumn != null)
                {
                    var fdr = double.Parse(qValColumn[i]);
                    if (fdr > FdrCutoff) continue;
                }

                var sequenceText = GetSequenceText(sequence, mod);

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsPathFinder)
                {
                    SequenceText = sequenceText,
                    Modifications = mod,
                    Pre = pre,
                    Post = post,
                    ProteinLength = proteinLen,
                    SpectralEvalue = evalue,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
コード例 #17
0
        private double[][] LoadTable(string fname)
        {
            if (!File.Exists(fname)) throw new FileNotFoundException("Missing score datafile: " + fname);
            
            var parser = new TsvFileParser(fname);
            var table = new double[_massBins.Length][];
            
            for (var i = 0; i < _massBins.Length; i++)
            {
                table[i] = new double[NumberOfBins];

                for (var k = 0; k < NumberOfBins; k++)
                {
                    var colData = parser.GetData(string.Format("{0}", k));
                    table[i][k] = double.Parse(colData[i]);
                }
            }
            return table;
        }
コード例 #18
0
ファイル: TestRankScore.cs プロジェクト: javamng/GitHUB
        public void DiaRankScore()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dataFile =
    @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\raw\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw";
            const string tsvFile =
                @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\tsv\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.tsv";

            if (!File.Exists(dataFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataFile);
            }

            if (!File.Exists(tsvFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tsvFile);
            }

            var parser = new TsvFileParser(tsvFile);
            var sequences = parser.GetData("Peptide");
            var charges = parser.GetData("Charge");
            var scans = parser.GetData("ScanNum");

            var lcms = InMemoryLcMsRun.GetLcMsRun(dataFile, 0, 0);
            var rankScorer =
    new DiaRankScore(
        @"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QExactive_Tryp.txt");

            using (
                var outFile = new StreamWriter(@"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QCShew_Score_2.txt"))
            {
                outFile.WriteLine("Target\tDecoy");
                for (int i = 0; i < sequences.Count; i++)
                {
                    string sequenceStr = sequences[i];
                    int charge = Convert.ToInt32(charges[i]);
                    int scan = Convert.ToInt32(scans[i]);

                    var sequence = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr);
                    var decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr);
                    decoySeq.Reverse();
                    var decoyStr = decoySeq.Aggregate("", (current, aa) => current + aa);
                    decoyStr = SimpleStringProcessing.Mutate(decoyStr, sequence.Count/2);
                    decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(decoyStr);

                    var sequenceScore = rankScorer.GetScore(sequence, charge, scan, lcms);
                    var decoyScore = rankScorer.GetScore(decoySeq, charge, scan, lcms);
                    outFile.WriteLine("{0}\t{1}", sequenceScore, decoyScore);
                }
            }
        }
コード例 #19
0
        public void TestFeatureIdMatching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var resultParser = new MsPathFinderParser(resultFilePath);
            const double qValueThreshold = 0.01;
            const double tolerancePpm = 13;

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";
            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");

            if (!File.Exists(rawFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            var idList =
                resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList();
            var idMassList = idList.Select(id => id.Mass).ToList();
            var idFlag = new bool[idList.Count];


            // Parse sequence tags
            var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag");
            const int minTagLength = 6;
            const int numProtMatches = 4;
//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";

            if (!File.Exists(tagFileName))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);

            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var featureParser = new TsvFileParser(featureFileName);

            var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray();
            var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray();
            var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray();
            var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray();

            var numFeaturesWithId = 0;
            var numFeaturesWithMs2 = 0;
            var numFeaturesWithTags = 0;
            var numFeaturesWithMatchingTags = 0;
            var numFeaturesWithTwoOrMoreMatchingTags = 0;
            var numFeaturesWithNoIdAndMatchingTags = 0;

            for (var i = 0; i < featureParser.NumData; i++)
            {
                var mass = monoMass[i];

                // Find Id
                var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1);
                var minMass = mass - tolDa;
                var maxMass = mass + tolDa;
                var index = idMassList.BinarySearch(mass);
                if (index < 0) index = ~index;

                var matchedId = new List<MsPathFinderId>();
                // go down
                var curIndex = index - 1;
                while (curIndex >= 0)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass < minMass) break;
                    if (curId.Scan > minScan[i] && curId.Scan < maxScan[i]
                        && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    --curIndex;
                }

                // go up
                curIndex = index;
                while (curIndex < idList.Count)
                {
                    var curId = idList[curIndex];
                    if (curId.Mass > maxMass) break;
                    if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i]
                        && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i])
                    {
                        matchedId.Add(curId);
                        idFlag[curIndex] = true;
                    }
                    ++curIndex;
                }

                var hasId = false;
                if (matchedId.Any())
                {
                    ++numFeaturesWithId;
                    hasId = true;
                }

                // Find MS2 scans
//                var numMs2Scans = 0;
                var tags = new List<SequenceTag>();
                var hasMs2 = false;
                for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++)
                {
                    var isolationWindow = run.GetIsolationWindow(scanNum);
                    if (isolationWindow == null) continue;
                    var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                    var charge = (int)Math.Round(mass / isolationWindowTargetMz);
                    if (charge < minCharge[i] || charge > maxCharge[i]) continue;
                    var mz = Ion.GetIsotopeMz(mass, charge,
                        Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex);
                    if (isolationWindow.Contains(mz))
                    {
//                        ++numMs2Scans;
                        tags.AddRange(tagParser.GetSequenceTags(scanNum));
                        hasMs2 = true;
                    }
                }
                if (hasMs2) ++numFeaturesWithMs2;
                if (tags.Any()) ++numFeaturesWithTags;
                var protHist = new Dictionary<string, int>();
                var hasMatchedTag = false;
                foreach (var tag in tags)
                {
                    var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray();
                    if (matchedProteins.Any())
                    {
                        hasMatchedTag = true;
                        foreach (var protein in matchedProteins)
                        {
                            int num;
                            if (protHist.TryGetValue(protein, out num)) protHist[protein] = num + 1;
                            else protHist[protein] = 1;
                        }
                    }
                }
                if (hasMatchedTag)
                {
                    ++numFeaturesWithMatchingTags;
                    if (!hasId) ++numFeaturesWithNoIdAndMatchingTags;
                }
                if (protHist.Any())
                {
                    var maxOcc = protHist.Values.Max();
                    if (maxOcc >= numProtMatches) ++numFeaturesWithTwoOrMoreMatchingTags;
                }
            }

            Console.WriteLine("NumFeatures: {0}", featureParser.NumData);
            Console.WriteLine("NumId: {0}", idList.Count);
            Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData);
            Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData);

            for (var i = 0; i < idFlag.Length; i++)
            {
                if (!idFlag[i])
                {
                    Console.WriteLine(idList[i].Scan);
                }
            }


//            Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001)));
//
//            Console.WriteLine(featureFileName);
        }
コード例 #20
0
ファイル: TestBottomUpScoring.cs プロジェクト: javamng/GitHUB
        public void TestInitialScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string icResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\Ic_NTT2_03_NoMod_NoRescoring\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28_IcTarget.tsv";
            if (!File.Exists(icResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, icResultPath);
            }

            var icParser = new TsvFileParser(icResultPath);
            var icScans = icParser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var icPeptides = icParser.GetData("Sequence");
            var icScore = icParser.GetData("Score").Select(s => Convert.ToInt32(s)).ToArray();
            var map = new Dictionary<string, int>();
            for (var i = 0; i < icParser.NumData; i++)
            {
                map.Add(icScans[i]+":"+icPeptides[i], icScore[i]);
            }

            const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv";
            if (!File.Exists(msgfPlusResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath);
            }

            var msgfPlusResults = new MsGfResults(msgfPlusResultPath);
            var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01);
            //Console.WriteLine("NumMatches: {0}", matches.Count);
            Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tIcScore");
            foreach (var match in matches)
            {
                var scanNum = match.ScanNum;
                var peptide = match.Peptide;
                var specEValue = match.SpecEValue;
                int score;
                if (!map.TryGetValue(scanNum + ":" + peptide, out score)) score = -1;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, score);
            }
        }
コード例 #21
0
        public List<ProteinSpectrumMatch> ReadMsGfPlusResult(string msgfResultPath, int maxPrsm)
        {
            var parser = new TsvFileParser(msgfResultPath);
            var prsmList = new List<ProteinSpectrumMatch>();
            var prevScanNum = -1;

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);

                if (prevScanNum == scanNum) continue;
                prevScanNum = scanNum;

                var mz = double.Parse(parser.GetData("PrecursorMZ")[i]);
                var protName = parser.GetData("Protein")[i];
                var protDesc = "";
                var score = double.Parse(parser.GetData("MSGFScore")[i]);
                var charge = int.Parse(parser.GetData("Charge")[i]);

                var seq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequence);
                var sequenceText = GetSequenceText(seq);
                var mass = (mz - Constants.Proton)*charge;
                var firstResId = 0;
                var lastResId = 0;
                var fdr = Double.Parse(parser.GetData("QValue")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsGfPlus)
                {
                    SequenceText = sequenceText,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
コード例 #22
0
        public void CompareRt()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Q-Exactive
            //const string qeDdaResult = @"D:\Research\Data\UW\QExactive\DDA_All_Summary.tsv";
            //const string qeDiaResult = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv";

            // Fusion
            const string qeDdaResult = @"D:\Research\Data\UW\Fusion\DDA_Summary.tsv";
            const string qeDiaResult = @"D:\Research\Data\UW\Fusion\DIA_Summary.tsv";

            const string specFileDda = @"D:\Research\Data\UW\QExactive\82593_lv_mcx_DDA.raw";
            var ddaReader = new XCaliburReader(specFileDda);

            var specFileToReader = new Dictionary<string, XCaliburReader>();
            var specFilesDia = Directory.GetFiles(@"D:\Research\Data\UW\QExactive\", "*_DIA_*.raw");
            foreach (var specFile in specFilesDia)
            {
                var specFileNoExt = Path.GetFileNameWithoutExtension(specFile);
                if (specFileNoExt == null) continue;
                var reader = new XCaliburReader(specFile);
                specFileToReader.Add(specFileNoExt, reader);
            }

            const string resultPath1 = qeDdaResult;
            const string resultPath2 = qeDiaResult;

            var result1 = new TsvFileParser(resultPath1);
            var result2 = new TsvFileParser(resultPath2);

            const double pepQValueThreshold = 0.01;
            var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold),
                                                      result2.GetPeptides(pepQValueThreshold));

            var intersectionPeptides = vennDiagram.Intersection;

            var result1Peptides = result1.GetData("Peptide");
            var result1ScanNums = result1.GetData("ScanNum");

            var result2Peptides = result2.GetData("Peptide");
            var result2ScanNums = result2.GetData("ScanNum");
            var result2SpecFile = result2.GetData("#SpecFile");

            Console.WriteLine("Peptide\tScanNum1\tScanNum2\tRt1\tRt2");
            foreach (var peptide in intersectionPeptides)
            {
                var index1 = result1Peptides.IndexOf(peptide);
                var index2 = result2Peptides.IndexOf(peptide);

                var scanNum1 = Convert.ToInt32(result1ScanNums[index1]);
                var scanNum2 = Convert.ToInt32(result2ScanNums[index2]);

                var diaFile = Path.GetFileNameWithoutExtension(result2SpecFile[index2]);

                var reader1 = ddaReader;
                var reader2 = specFileToReader[diaFile];

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", peptide.Replace("C+57.021", "C"), scanNum1, scanNum2, reader1.RtFromScanNum(scanNum1), reader2.RtFromScanNum(scanNum2));
            }
        }
コード例 #23
0
        public void GenerateVennDiagramsPeMmr()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // No PE-MMR
            //const string noPeMmr = @"D:\Research\Data\PEMMR\iTRAQ_N33T34_10ug_100cm_300min_C2_061213.tsv";

            // PE-MMR Scan based FDR
            //const string scanBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_ScanFDR.tsv";

            // UMC based FDR
            const string umcBasedPeMmr = @"D:\Research\Data\PEMMR\NewSpectra\iTRAQ_N33T34_10ug_100cm_300min_C2_061213_MX_PEMMR_UMCID_UMCFDR.tsv";

            // IPA
            const string ipa = @"D:\Research\Data\PEMMR\Ox\IPA_Summary_TargetOnly.tsv";

            const string resultPath1 = umcBasedPeMmr;
            const string resultPath2 = ipa;

            var result1 = new TsvFileParser(resultPath1);
            var result2 = new TsvFileParser(resultPath2);

            const double pepQValueThreshold = 0.01;
            var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold),
                                                      result2.GetPeptides(pepQValueThreshold));
            Console.WriteLine("{0}\t{1}\t{2}",
                              vennDiagram.Set1Only.Count + vennDiagram.Intersection.Count,
                              vennDiagram.Intersection.Count,
                              vennDiagram.Set2Only.Count + vennDiagram.Intersection.Count);
            Console.WriteLine("{0}\t{1}\t{2}",
                              vennDiagram.Set1Only.Count,
                              vennDiagram.Intersection.Count,
                              vennDiagram.Set2Only.Count);

            foreach (var peptide in vennDiagram.Set2Only)
            {
                Console.WriteLine(peptide);
                var peptides = result2.GetData("Peptide");
            }
        }
コード例 #24
0
ファイル: MSPathFinderParser.cs プロジェクト: javamng/GitHUB
        private bool Parse(string fileName)
        {
            var parser = new TsvFileParser(fileName);
            var scan = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var pre = parser.GetData("Pre").Where(s => s.Length == 1).Select(p => p[0]).ToArray();
            if (pre.Length != parser.NumData) return false;
            var sequence = parser.GetData("Sequence").ToArray();
            var post = parser.GetData("Post").Where(s => s.Length == 1).Select(p => p[0]).ToArray();
            if (post.Length != parser.NumData) return false;
            var mod = parser.GetData("Modifications").ToArray();
            var composition = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var proteinName = parser.GetData("ProteinName").ToArray();
            var proteinDesc = parser.GetData("ProteinDesc").ToArray();
            var proteinLength = parser.GetData("ProteinLength").Select(s => Convert.ToInt32(s)).ToArray();
            var start = parser.GetData("Start").Select(s => Convert.ToInt32(s)).ToArray();
            var end = parser.GetData("End").Select(s => Convert.ToInt32(s)).ToArray();
            var charge = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
            var mostAbundantIsotopeMz = parser.GetData("MostAbundantIsotopeMz").Select(Convert.ToDouble).ToArray();
            var mass = parser.GetData("Mass").Select(Convert.ToDouble).ToArray();
            var numMatchedFragment = parser.GetData("#MatchedFragments").Select(s => Convert.ToInt32(s)).ToArray();
            var qValue = parser.GetData("QValue").Select(Convert.ToDouble).ToArray();
            var pepQValue = parser.GetData("PepQValue").Select(Convert.ToDouble).ToArray();

            for (var i = 0; i < parser.NumData; i++)
            {
                var id = new MsPathFinderId(scan[i], pre[i], sequence[i], post[i], mod[i], 
                    composition[i], proteinName[i], proteinDesc[i], proteinLength[i],
                    start[i], end[i], charge[i], mostAbundantIsotopeMz[i], mass[i], 
                    numMatchedFragment[i], qValue[i], pepQValue[i])
                ;
                _idList.Add(id);

                if(!_scanNumToPrSm.ContainsKey(scan[i])) _scanNumToPrSm.Add(scan[i], id);
            }
            return true;
        }
コード例 #25
0
ファイル: TestTopDownScoring.cs プロジェクト: javamng/GitHUB
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            // Configure amino acid set
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            const double filteringWindowSize = 1.1;
            const int isotopeOffsetTolerance = 2;
            var tolerance = new Tolerance(10);
            const int minCharge = 1;
            const int maxCharge = 20;
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            var fileExt = new string[] {"IcTarget", "IcDecoy"};
            foreach (var ext in fileExt)
            {
                var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext);
                var parser = new TsvFileParser(resultFileName);
                var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences = parser.GetData("Sequence").ToArray();
                var modStrs = parser.GetData("Modifications").ToArray();
                var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();
                var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];
                    
                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, parser.NumData, i =>
                    {
                        var scan = scans[i];
                        var charge = charges[i];
                        var protSequence = protSequences[i];
                        var modStr = modStrs[i];
                        var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;
                        Assert.True(ms2Spec != null);
                        var scores = scorer.GetScores(sequence, charge, scan);

                        var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                            isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                        var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                            comparer);
                        var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                        var gf = new GeneratingFunction(graph);
                        gf.ComputeGeneratingFunction();

                        var specEvalue = gf.GetSpectralEValue(scores.Score);

                        var rowStr = parser.GetRows()[i];
                        var items = rowStr.Split('\t').ToArray();
                        var newRowStr = string.Join("\t", items, 0, 15);

                        //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                        lock (lines)
                        {
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);    
                        }
                        //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                    });

                    foreach (var line in lines) writer.WriteLine(line);
                }
                Console.WriteLine("Done");
            }
        }
コード例 #26
0
ファイル: TestBottomUpScoring.cs プロジェクト: javamng/GitHUB
        public void CompareIpaIc()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultDir = @"D:\Research\Data\UW\QExactive\Ic_NTT2_03";
            if (!Directory.Exists(resultDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, resultDir);
            }

            var targetPeptides = new HashSet<string>();
            foreach (var icResultFilePath in Directory.GetFiles(resultDir, "*DIA*IcTarget.tsv"))
            {
                var icParser = new TsvFileParser(icResultFilePath);
                foreach (var peptide in icParser.GetData("Sequence")) targetPeptides.Add(peptide);
            }

            const string ipaResultPath = @"D:\Research\Data\UW\QExactive\DIA_All_Summary.tsv";
            if (!File.Exists(ipaResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, methodName);
            }            

            var parser = new TsvFileParser(ipaResultPath);
            var ipaPeptides = parser.GetPeptides(0.005).Select(p => p.Replace("C+57.021", "C"));
            var ipaOnly = 0;
            var both = 0;
            foreach (var ipaPeptide in ipaPeptides)
            {
                if (targetPeptides.Contains(ipaPeptide)) ++both;
                else
                {
                    ++ipaOnly;
                    Console.WriteLine(ipaPeptide);
                }
            }

            Console.WriteLine("Both: {0}, IpaOnly: {1}, Sum: {2}", both, ipaOnly, both+ipaOnly);
        }
コード例 #27
0
        public void CountMatchedProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const int minTagLength = 3;

            var scanToProtein = new Dictionary<int, string>();
            var idTag = new Dictionary<int, bool>();
            const string resultFilePath = @"H:\Research\ProMex\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
            var proteinNames = parser.GetData("ProteinName").ToArray();
            var qValues = parser.GetData("QValue").Select(Convert.ToDouble).ToArray();
            for (var i = 0; i < qValues.Length; i++)
            {
                if (qValues[i] > 0.01) break;
                scanToProtein.Add(scans[i], proteinNames[i]);
                idTag.Add(scans[i], false);
            }

            const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);

            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

//            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta";
//            const string fastaFilePath =
//                @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            Console.WriteLine("Sequence length: {0}", fastaDb.GetSequence().Length);

            const string tagFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.seqtag";
            if (!File.Exists(tagFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFilePath);
            }

            var hist = new Dictionary<int, int>();
            
            var scanSet = new HashSet<int>();
            HashSet<string> proteinSetForThisScan = null;
            var prevScan = -1;
            var totalNumMatches = 0L;
            var isHeader = true;
            foreach (var line in File.ReadAllLines(tagFilePath))
            {
                if (isHeader)
                {
                    isHeader = false;
                    continue;
                }

                var token = line.Split('\t');
                if (token.Length < 3) continue;
                var scan = Convert.ToInt32(token[0]);
                var proteinId = scanToProtein.ContainsKey(scan) ? scanToProtein[scan] : null;

                if (scan != prevScan)
                {
                    if (proteinSetForThisScan != null)
                    {
                        var numMatches = proteinSetForThisScan.Count;
                        int numOcc;
                        if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1;
                        else hist.Add(numMatches, 1);
                    }

                    prevScan = scan;
                    proteinSetForThisScan = new HashSet<string>();
                }

                scanSet.Add(scan);
                var tag = token[1];
                if (tag.Length < minTagLength) continue;

                if (proteinSetForThisScan == null) continue;

                var numMatchesForThisTag = 0;
                foreach (var matchedProtein in searchableDb.FindAllMatchedSequenceIndices(tag)
                    .Select(index => fastaDb.GetProteinName(index)))
                {
                    proteinSetForThisScan.Add(matchedProtein);
                    ++numMatchesForThisTag;

                    if (proteinId != null && matchedProtein.Equals(proteinId))
                    {
                        idTag[scan] = true;
                    }
                }
                totalNumMatches += numMatchesForThisTag;
//                if (numMatchesForThisTag > 10)
//                {
//                    Console.WriteLine("{0}\t{1}", tag, numMatchesForThisTag);
//                }
            }

            if (proteinSetForThisScan != null)
            {
                var numMatches = proteinSetForThisScan.Count;
                int numOcc;
                if (hist.TryGetValue(numMatches, out numOcc)) hist[numMatches] = numOcc + 1;
                else hist.Add(numMatches, 1);
            }
            
            Console.WriteLine("AvgNumMatches: {0}", totalNumMatches/(float)scanSet.Count);
            Console.WriteLine("Histogram:");
            foreach (var entry in hist.OrderBy(e => e.Key))
            {
                Console.WriteLine("{0}\t{1}", entry.Key, entry.Value);
            }

            Console.WriteLine("NumId: {0}", idTag.Count);
            Console.WriteLine("NumIdByTag: {0}", idTag.Select(e => e.Value).Count(v => v));
        }
コード例 #28
0
        public List<ProteinSpectrumMatch> ReadMsAlignResult(string msAlignResultTablePath, int maxPrsm)
        {
            var parser = new TsvFileParser(msAlignResultTablePath);
            var prsmList = new List<ProteinSpectrumMatch>();

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Peptide")[i];
                var scanNum = int.Parse(parser.GetData("Scan(s)")[i]);
                var mass = double.Parse(parser.GetData("Precursor_mass")[i]);
                var protNameDesc = parser.GetData("Protein_name")[i];

                var k = protNameDesc.IndexOf(' ');
                var protName = (k < 0) ? protNameDesc : protNameDesc.Substring(0, k);
                var protDesc = (k < 0) ? protNameDesc : protNameDesc.Substring(k+1);
                
                var firstResId = int.Parse(parser.GetData("First_residue")[i]);
                var lastResId = int.Parse(parser.GetData("Last_residue")[i]);
                var score = double.Parse(parser.GetData("#matched_fragment_ions")[i]);
                var sequenceText = parser.GetData("Peptide")[i];
                var charge = int.Parse(parser.GetData("Charge")[i]);
                var evalue = double.Parse(parser.GetData("E-value")[i]);

                var fdr = Double.Parse(parser.GetData("FDR")[i]);
                if (fdr > FdrCutoff) continue;

                var prsm = new ProteinSpectrumMatch(sequence, scanNum, mass, charge, protName, protDesc, firstResId, lastResId, score, ProteinSpectrumMatch.SearchTool.MsAlign)
                {
                    SequenceText = sequenceText,
                    SpectralEvalue = evalue,
                };

                prsmList.Add(prsm);

                if (prsmList.Count >= maxPrsm) break;
            }

            return prsmList;
        }
コード例 #29
0
        private void OutputMergedResult(StreamWriter writer, TsvFileParser parser, FastaDatabase fastaDb)
        {
            var scoreColumn = parser.GetData("#MatchedFragments") ?? parser.GetData("Score");
            var qValColumn = parser.GetData("QValue");

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequence = parser.GetData("Sequence")[i];
                var scanNum = int.Parse(parser.GetData("Scan")[i]);
                var mass = double.Parse(parser.GetData("Mass")[i]);
                var protName = parser.GetData("ProteinName")[i];
                var protDesc = fastaDb.GetProteinDescription(protName);
                
                var firstResId = int.Parse(parser.GetData("Start")[i]);
                var lastResId = int.Parse(parser.GetData("End")[i]);
                var score = double.Parse(scoreColumn[i]);
                var mod = parser.GetData("Modifications")[i];
                var qvalue = (qValColumn != null) ? qValColumn[i] : "0";

                writer.Write(scanNum);
                writer.Write("\t");
                writer.Write(sequence);
                writer.Write("\t");
                writer.Write(mod);
                writer.Write("\t");
                writer.Write(mass);
                writer.Write("\t");
                writer.Write(protName);
                writer.Write("\t");
                writer.Write(protDesc);
                writer.Write("\t");
                writer.Write(firstResId);
                writer.Write("\t");
                writer.Write(lastResId);
                writer.Write("\t");
                writer.Write(score);
                writer.Write("\t");
                writer.Write(qvalue);
                writer.Write("\n");
            }

        }
コード例 #30
0
        public void CompareRtFusion()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Fusion
            const string qeDdaResult = @"D:\Research\Data\UW\Fusion\DDA_Summary.tsv";
            const string qeDiaResult = @"D:\Research\Data\UW\Fusion\DIA_Summary.tsv";

            const string specFileDda = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var ddaReader = new XCaliburReader(specFileDda);

            const string specFileDia = @"D:\Research\Data\UW\Fusion\WT_D_DIA_130412091220.raw";
            var diaReader = new XCaliburReader(specFileDia);

            const string resultPath1 = qeDdaResult;
            const string resultPath2 = qeDiaResult;

            var result1 = new TsvFileParser(resultPath1);
            var result2 = new TsvFileParser(resultPath2);

            const double pepQValueThreshold = 0.01;
            var vennDiagram = new VennDiagram<string>(result1.GetPeptides(pepQValueThreshold),
                                                      result2.GetPeptides(pepQValueThreshold));

            var intersectionPeptides = vennDiagram.Intersection;

            var result1Peptides = result1.GetData("Peptide");
            var result1ScanNums = result1.GetData("ScanNum");

            var result2Peptides = result2.GetData("Peptide");
            var result2ScanNums = result2.GetData("ScanNum");

            Console.WriteLine("Peptide\tScanNum1\tScanNum2\tRt1\tRt2");
            foreach (var peptide in intersectionPeptides)
            {
                var index1 = result1Peptides.IndexOf(peptide);
                var index2 = result2Peptides.IndexOf(peptide);

                var scanNum1 = Convert.ToInt32(result1ScanNums[index1]);
                var scanNum2 = Convert.ToInt32(result2ScanNums[index2]);

                var reader1 = ddaReader;
                var reader2 = diaReader;

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", peptide.Replace("C+57.021", "C"), scanNum1, scanNum2, reader1.RtFromScanNum(scanNum1), reader2.RtFromScanNum(scanNum2));
            }
        }