public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv"; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < NdataSet; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i)); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i)); var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i)); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i)); Console.WriteLine(rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder); prsmList1.AddRange(prsmList2); var prsmList = MergePrsm(prsmList1); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < NdataSet; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", GetDataSetNames(i)); } OutputCrossTabWithId(outFilePath, alignment); }
public void TestMaxEntDeconvoluter() { const string rawFileFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_214\2015_4"; const string fname = "WHIM2_LoHi_T2DD_HCD_GF07_02"; var rawFile = string.Format(@"{0}\{1}.pbf", rawFileFolder, fname); var ms1ft = string.Format(@"\\protoapps\UserData\Jungkap\CompRef\lowRes\{0}.ms1ft", fname); var run = PbfLcMsRun.GetLcMsRun(rawFile, 1.4826, 0); var ms1ScanNums = run.GetMs1ScanVector(); var featureFinder = new LcMsPeakMatrixLowResolution(run); foreach (var scan in ms1ScanNums) { var fts = featureFinder.DetectMs1Features(scan); //Console.WriteLine("{0}\t{1}",scan, fts.Count); } var features = featureFinder.GetLcMsFeatures(); var writer = new StreamWriter(ms1ft); var id = 1; writer.WriteLine("FeatureID\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMonoMass\tAbundance\tRepScan\tMaxElutionTime\tElutionLength\tLikelihoodRatio"); foreach (var feature in features.OrderBy(f => f.Mass)) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}", id, feature.MinScanNum, feature.MaxScanNum, feature.MinCharge, feature.MaxCharge, feature.Mass, feature.Abundance, feature.RepresentativeScanNum, feature.MinElutionTime, feature.MaxElutionTime, 0); id++; } writer.Close(); }
public void TestSumMs2Spectra() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var specFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TestYufengData\NewQC_LongSep_29Sep14_141001104925.raw"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + specFilePath); } const int minScanNum = 1289; //const int maxScanNum = 1389; const int minCharge = 6; //const int maxCharge = 6; const string sequence = "EIRGYRPPEPYKGKGVRYDDEEVRRKEAKKK"; var aaSet = new AminoAcidSet(); var run = PbfLcMsRun.GetLcMsRun(specFilePath); var scorer = new InformedTopDownScorer(run, aaSet, 1, minCharge - 1, new Tolerance(10)); scorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, Composition.Parse("C(166) H(270) N(52) O(49) S(0)"), minCharge, minScanNum); }
public void TestIsosFilter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string isosFilePath = @"H:\Research\QCShew_TopDown\Production\ICRTools\QC_Shew_Intact_26Sep14_Bane_C2Column3_Isos.csv"; if (!File.Exists(isosFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, isosFilePath); } const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var filter = new IsosFilter(run, new Tolerance(10), isosFilePath); Console.WriteLine(string.Join("\t", filter.GetMatchingMs2ScanNums(30261.68374))); }
public void TestAbpSumMs1Spectra() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var specFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TestYufengData\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + specFilePath); } const int minScanNum = 5657; const int maxScanNum = 5699; const int MAX_POINTS = 50; var run = PbfLcMsRun.GetLcMsRun(specFilePath); if (run == null) { return; } var summedSpec = run.GetSummedMs1Spectrum(minScanNum, maxScanNum); var peakList = summedSpec.GetPeakListWithin(1180.0, 1192.0); var filteredPeakList = new List <Peak>(); PeakListUtils.FilterNoise(peakList, ref filteredPeakList); new Spectrum(filteredPeakList, 0).Display(MAX_POINTS); }
public void TestSumMs1Spectra() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } const int minScanNum = 46454; const int maxScanNum = 46661; const int MAX_POINTS = 50; var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun; if (run == null) { return; } var summedSpec = run.GetSummedMs1Spectrum(minScanNum, maxScanNum); summedSpec.Display(MAX_POINTS); }
public void TestRunningTimeSummingSpectra() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 1.4826, 1.4826) as PbfLcMsRun; var sw = new Stopwatch(); sw.Start(); const int windowSize = 5; foreach (var scanNum in run.GetScanNumbers(1)) { //var spec = run.GetSpectrum(scanNum); var spec = run.GetSummedMs1Spectrum(Math.Max(scanNum - windowSize, run.MinLcScan), Math.Min(scanNum + windowSize, run.MaxLcScan)); } sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestProMexFilter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TopDown\ProductionQCShew\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); const string ms1FtPath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TopDown\ProductionQCShew\QC_Shew_Intact_26Sep14_Bane_C2Column3.ms1ft"; if (!File.Exists(ms1FtPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, ms1FtPath); } var filter = new Ms1FtFilter(run, new Tolerance(10), ms1FtPath); // Console.WriteLine("ScanNums: {0}", string.Join("\t",filter.GetMatchingMs2ScanNums(8480.327609))); Assert.IsTrue(filter.GetMatchingMs2ScanNums(8480.327609).Contains(5255)); }
public void ScorePSM(int scan, string sequenceStr) { // Set input file paths. const string specFilePath = @"\\protoapps\userdata\Wilkins\UIMF Files\9 pep mix 365 mTorr with ims 45V CID\9 pep mix 365 mTorr with ims 45V CID.pbf"; var lcmsRun = PbfLcMsRun.GetLcMsRun(specFilePath); var productSpectrum = lcmsRun.GetSpectrum(scan) as ProductSpectrum; Assert.NotNull(productSpectrum); var modification = Modification.RegisterAndGetModification("oh->nh2", new Composition(0, 1, 1, -1, 0)); Assert.NotNull(modification); var searchModifications = new List <SearchModification> { new SearchModification(Modification.Get("oh->nh2"), 'M', SequenceLocation.ProteinCTerm, true), new SearchModification(Modification.Get("oh->nh2"), 'Q', SequenceLocation.ProteinCTerm, true), new SearchModification(Modification.PyroGluE, 'R', SequenceLocation.ProteinNTerm, true) }; var aminoAcidSet = new AminoAcidSet(searchModifications, 1); var sequence = this.LoadSequence(sequenceStr, aminoAcidSet); //var scorerFactory = new ScorerFactory(new Tolerance(30, ToleranceUnit.Ppm), 1, 5); //var scorer = scorerFactory.GetScorer(productSpectrum); //var score = IonUtils.ScoreSequence(scorer, sequence); //Console.WriteLine(score); }
public void TestDisplaySpectra(string rawFile, string idFile) { // init var idFileReader = IdFileReaderFactory.CreateReader(idFile); var ids = idFileReader.Read(); var lcms = PbfLcMsRun.GetLcMsRun(rawFile); var idList = ids.ToList(); foreach (var id in idList) { id.LcMs = lcms; id.RawFileName = Path.GetFileNameWithoutExtension(rawFile); } idList.Sort(new PrSm.PrSmScoreComparer()); var prsm = idList[0]; // init XicPlotViewModel var dialogService = new TestableMainDialogService(); var spectrumViewModel = new SpectrumViewModel(dialogService, lcms); // init test ions var baseIonTypes = new List <BaseIonType> { BaseIonType.B, BaseIonType.Y }; var neutralLosses = new List <NeutralLoss> { NeutralLoss.NoLoss }; const int charge = 1; const int minCharge = 1, maxCharge = 2; var ionTypeFactory = new IonTypeFactory(maxCharge); var ionTypes = IonUtils.GetIonTypes(ionTypeFactory, baseIonTypes, neutralLosses, minCharge, maxCharge); var ions = IonUtils.GetFragmentIonLabels(prsm.Sequence, charge, ionTypes); var ionVms = ions.Select(label => new LabeledIonViewModel(label.Composition, label.IonType, label.IsFragmentIon, lcms, label.PrecursorIon, label.IsChargeState, label.Index)).ToList(); }
public void TestDisplaySpectrum(string rawFile, string tsvFile) { // init var idFileReader = IdFileReaderFactory.CreateReader(tsvFile); var ids = idFileReader.Read(); var lcms = PbfLcMsRun.GetLcMsRun(rawFile); var idList = ids.ToList(); foreach (var id in idList) { id.LcMs = lcms; id.RawFileName = Path.GetFileNameWithoutExtension(rawFile); } // init SpectrumPlotViewModel var dialogService = new TestableMainDialogService(); var spectrumPlotViewModel = new SpectrumPlotViewModel(dialogService, new FragmentationSequenceViewModel(), 1.05, false); // init test data idList.Sort(new PrSm.PrSmScoreComparer()); var prsm = idList[0]; // init test ions var ions = new ReactiveList <LabeledIonViewModel>(); spectrumPlotViewModel.Spectrum = prsm.Ms2Spectrum; ////spectrumPlotViewModel.Ions = ions; // plot should not be null Assert.NotNull(spectrumPlotViewModel.PlotModel); // plot should contain 1 stem series (the spectrum stem series) Assert.True(spectrumPlotViewModel.PlotModel.Series.Count == 1); }
/// <summary> /// Returns InformedProteomics LcMsRun object from mass spec data types including .raw and .mzml /// </summary> /// <param name="rawFilePath"></param> /// <returns></returns> public LcMsRun GetLcMsData(string rawFilePath) { var progress = new Progress <ProgressData>(); progress.ProgressChanged += Progress_ProgressChanged; var run = PbfLcMsRun.GetLcMsRun(rawFilePath, progress); /* * string ext = Path.GetExtension(rawFilePath); * switch (ext.ToLower()) * { * case ".raw": * run = PbfLcMsRun.GetLcMsRun(rawFilePath, MassSpecDataType.XCaliburRun); * break; * case ".mzml": * run = PbfLcMsRun.GetLcMsRun(rawFilePath, MassSpecDataType.MzMLFile); * break; * case ".gz": * if (rawFilePath.ToLower().EndsWith(".mzml.gz")) * { * run = PbfLcMsRun.GetLcMsRun(rawFilePath, MassSpecDataType.MzMLFile); * } * break; * }*/ return(run); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\CompRef\aligned\promex_crosstab_temp.tsv"; var runLabels = new[] { "32A", "32B", "32C", "32D", "32E", "32F", "32G", "33A", "33B", "33C", "33D", "33E", "33F", "33G" }; var nDataset = runLabels.Length; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.pbf", RawFolder, runLabels[i]); var mspFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ_IcTda.tsv", MsPfFolder, runLabels[i]); var ms1FtFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.ms1ft", Ms1FtFolder, runLabels[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", runLabels[i]); } OutputCrossTabWithId(outFilePath, alignment, runLabels); }
public void DoAnalysis() { InitilizeMatrix(_spectrumMatchesMatrix); InitilizeMatrix(_tagsGeneratedMatrix); InitilizeMatrix(_dataBaseHitMatrix); GetFilteredFeatures(_filteredFile); for (var i = 0; i < _rawFiles.Length; i++) { Console.WriteLine("Processing File {0}...............", i); var run = PbfLcMsRun.GetLcMsRun(_rawFiles[i]); var ms2List = run.GetScanNumbers(2); Console.WriteLine("# of scans {0}", ms2List.Count); for (var j = 0; j < _filteredFeatures.Count; j++) { var matchedSpecList = GetMatchedSpectrums(run, ms2List, _filteredFeatures[j], i); _spectrumMatchesMatrix[j][i] = matchedSpecList.Count; var tags = GetTags(matchedSpecList); _tagsGeneratedMatrix[j][i] = tags.Count; var hitCount = TagsInDatabase(tags); _dataBaseHitMatrix[j][i] = hitCount; } } }
private void AlignFeatures(List <string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath) { var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run); features.AddRange(features2); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); //var prsmFeatureMatch = new bool[prsmList.Count]; foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (var feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); //prsmFeatureMatch[k] = true; } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i]); } AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets); }
public void TestMatchedPeakPostScorer() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Parameters var productIonTolerance = new Tolerance(10); var scorer = new MatchedPeakPostScorer(productIonTolerance, 1, 10); var sw = new System.Diagnostics.Stopwatch(); const int ms2ScanNum = 4658; var sequence = new Sequence("GYSIKDIIYQGEKSGVHNWQTLSGQNFYWHPDWLHIAEDLTGHKATASIQAEGTKATQNEAEQTIVKHLNKS", new AminoAcidSet()); var specFilePath = Path.Combine(Utils.DEFAULT_SPEC_FILES_FOLDER, "QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; Assert.True(spec != null); sw.Start(); var score = scorer.ComputeScore(spec, sequence); Console.WriteLine("{0}\t{1}\t{2}", sequence, ms2ScanNum, score); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestDeconvolution() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } const int minScanNum = 46454; // 635.43 const int maxScanNum = 46661; // 638.90 const int MAX_POINTS = 50; var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun; if (run == null) { return; } var summedSpec = run.GetSummedMs1Spectrum(minScanNum, maxScanNum); summedSpec.FilterNoise(50.0); // summedSpec.Display(MAX_POINTS); var deconvoluted = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum(summedSpec, 2, 45, new Tolerance(10), 0.9, 2); deconvoluted.Display(MAX_POINTS); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Quant\aligned\promex_crosstab.tsv"; //const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned"; var runLabels = new string[] { "1x1", "1x2", "1x3", "1x4", "1x5", "5x1", "5x2", "5x3", "5x4", "5x5", "10x1", "10x2", "10x3", "10x4", "10x5", }; var nDataset = runLabels.Length; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new SpikeInFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", RawFolder, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); /* * for (var i = 0; i < nDataset; i++) * { * alignment.FillMissingFeatures(i); * Console.WriteLine("{0} has been processed", runLabels[i]); * } */ OutputCrossTabWithId(outFilePath, alignment, runLabels); }
private void FeatureMapGeneration() { var resultsFilePath = Path.Combine(Path.GetTempPath(), Path.GetFileNameWithoutExtension(mFeatureMapPbfFile) + "_FeatureMap.png"); var map = new LcMsFeatureMap(PbfLcMsRun.GetLcMsRun(mFeatureMapPbfFile), mFeatureMapResultsFile, 2000, 50000); map.SaveImage(resultsFilePath); Console.WriteLine("Image saved to " + resultsFilePath); }
public void TestFitMinusOneScore(int precursor, string adduct, string commonName, string id, string rawFilePath) { var lipid = new Lipid() { AdductFull = adduct, CommonName = commonName }; var lipidTarget = lipid.CreateLipidTarget(); var composition = lipidTarget.Composition; var compMinus1 = new Composition(composition.C, composition.H - 1, composition.N, composition.O, composition.S, composition.P); //Subtract one hydrogen to make this a minus1 fit score var lcmsRun = PbfLcMsRun.GetLcMsRun(rawFilePath); var spectrum = lcmsRun.GetSpectrum(precursor); var relativeIntensityThreshold = 0.1; var tolerance = new Tolerance(30, ToleranceUnit.Ppm); //Get the values to use to calculate pearson correlation var observedPeaks = LipidUtil.GetAllIsotopePeaks(spectrum, compMinus1, tolerance, relativeIntensityThreshold); if (observedPeaks == null) { Console.WriteLine("Observed peaks is null for scan " + id); } var isotopomerEnvelope = IsoProfilePredictor.GetIsotopomerEnvelop( compMinus1.C, compMinus1.H, compMinus1.N, compMinus1.O, compMinus1.S); var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } Console.WriteLine("The theoretical y values are: "); foreach (var value in isotopomerEnvelope.Envolope) { Console.WriteLine(value + ", "); } Console.WriteLine("The observed peak intensity x values are: "); foreach (var value in observedIntensities) { Console.WriteLine(value + ", "); } }
public void TestSumIsoProfilesAcrossDifferentCharges() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath) as PbfLcMsRun; //var spec = run.GetSpectrum(46452); // 635.37 var spec = run.GetSummedMs1Spectrum(46437, 46466); var tolerance = new Tolerance(10); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O(); var theoProfile = neutral.GetIsotopomerEnvelopeRelativeIntensities(); var expProfile = new double[theoProfile.Length]; for (var charge = 22; charge <= 45; charge++) { var ion = new Ion(neutral, charge); var isotopePeaks = spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) { continue; } Assert.True(isotopePeaks.Length == theoProfile.Length); for (var i = 0; i < isotopePeaks.Length; i++) { if (isotopePeaks[i] != null) { expProfile[i] += isotopePeaks[i].Intensity; } } } for (var i = 0; i < theoProfile.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", neutral.GetIsotopeMass(i), theoProfile[i], expProfile[i] / expProfile.Max()); } Console.WriteLine("Corr: " + FitScoreCalculator.GetPearsonCorrelation(theoProfile, expProfile)); }
public void CountTagMatches() { for (var i = 1; i < 52; i++) { var dataName = "Lewy_intact_" + i.ToString("D2"); var filePath = string.Format(@"{0}\{1}.pbf", PbfPath, dataName); var run = PbfLcMsRun.GetLcMsRun(filePath); var scans = run.GetScanNumbers(2); Console.WriteLine(scans.Count); } }
/// <summary> /// Implementation for <see cref="RunCommand"/>. /// Gets a command that validates search settings and closes the window. /// </summary> /// <returns>The <see cref="Task"/>.</returns> private async Task RunImplementation() { this.SearchRunning = true; this.runSearchCancellationToken = new CancellationTokenSource(); // Read spectrum file var lcms = await Task.Run(() => PbfLcMsRun.GetLcMsRun(this.SpectrumFilePath, 0, 0), this.runSearchCancellationToken.Token); // Get MS/MS scan numbers IEnumerable <int> ms2Scans = null; if (this.MaxScanNumber > 0 && (this.MaxScanNumber - this.MinScanNumber) >= 0) { var allMs2Scans = lcms.GetScanNumbers(2); ms2Scans = allMs2Scans.Where(scan => scan >= this.MinScanNumber && scan <= this.MaxScanNumber); } // Create truncated FASTA this.truncatedFastaDbFilePath = this.CreateTruncatedFastaFile(); // Progress updater this.SearchProgressPercent = 0.0; this.SearchProgressStatus = "Searching..."; var progress = new Progress <ProgressData>(progressData => { this.SearchProgressPercent = progressData.Percent; this.SearchProgressStatus = progressData.Status; }); // Run Search var topDownLauncher = this.GetTopDownLauncher(ms2Scans); this.runSearchTask = Task.Run( () => topDownLauncher.RunSearch( IcParameters.Instance.IonCorrelationThreshold, this.runSearchCancellationToken.Token, progress), this.runSearchCancellationToken.Token); await this.runSearchTask; ////topDownLauncher.RunSearch(IcParameters.Instance.IonCorrelationThreshold); this.SearchRunning = false; this.runSearchCancellationToken = null; // Results delivered on close this.Status = true; if (this.ReadyToClose != null) { this.ReadyToClose(this, EventArgs.Empty); } }
public void TestTagBasedSearchCompRef() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSetPath = @"D:\MassSpecFiles\CompRef"; const string fastaFilePath = @"D:\MassSpecFiles\CompRef\ID_003278_4B4B3CB1.fasta"; const string modsFilePath = @"D:\MassSpecFiles\CompRef\Mods.txt"; if (!Directory.Exists(dataSetPath)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dataSetPath); } if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fileEntries = Directory.GetFiles(dataSetPath); var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList(); dataset.Sort(); var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var aaSet = new AminoAcidSet(modsFilePath); for (var i = 0; i < dataset.Count; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", dataSetPath, dataset[i]); var ms1File = string.Format(@"{0}\{1}.ms1ft", dataSetPath, dataset[i]); var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, ".seqtag"); var run = PbfLcMsRun.GetLcMsRun(rawFile); const int minTagLength = 5; //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); Console.WriteLine("-----------------{0}--------------------", rawFile); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); Console.WriteLine("-----------------------------------------------------------------------"); } }
public void TestTagBasedSearch() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // const string rawFilePath = @"H:\Research\Lewy\raw\Lewy_intact_01.raw"; // const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; // const string rawFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw"; // const string rawFilePath = @"H:\Research\Weijun_TopDown\raw\UC4_Intact_plasmaTest_90_6May15_Bane_14-09-01RZ.raw"; // const string rawFilePath = @"H:\Research\Charles\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; const string rawFilePath = @"D:\MassSpecFiles\60k\Yufeng_SampleTest1_150614113438.pbf"; //const string rawFilePath = @"D:\MassSpecFiles\60k\NCR_50K_Test_24Jun15_Bane_15-02-02RZ.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); //const int minTagLength = 5; var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag"); //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_003836_DA9CC1E4.fasta"; //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); // var modsFilePath = @"H:\Research\QCShew_TopDown\Production\Mods_Methyl.txt"; var modsFilePath = @"D:\MassSpecFiles\60k\Mods.txt"; // var modsFilePath = ""; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); }
public void TestIanCidData() { const string specFilePath = @"\\protoapps\userdata\Wilkins\UIMF Files\9 pep mix 365 mTorr with ims 45V CID\9 pep mix 365 mTorr with ims 45V CID.UIMF"; const string fastaFilePath = @"\\protoapps\userdata\Wilkins\UIMF Files\melittin.fasta"; const string outputDirectory = @"\\protoapps\userdata\Wilkins\UIMF Files\9 pep mix 365 mTorr with ims 45V CID"; const double correlationThreshold = 0.7; // Add missing modifications // Initialize search modifications var searchModifications = new List <SearchModification> { }; var aminoAcidSet = new AminoAcidSet(searchModifications, 1); // Initialize spectrum file var lcmsRun = PbfLcMsRun.GetLcMsRun(specFilePath); var scanNumbers = lcmsRun.GetScanNumbers(2); // Initialize MSPathFinder //var launcher = new IcTopDownLauncher( // specFilePath, // fastaFilePath, // outputDirectory, // aminoAcidSet) //{ // MinSequenceLength = 1, // MaxSequenceLength = 100, // MaxNumNTermCleavages = 1, // MaxNumCTermCleavages = 0, // MinPrecursorIonCharge = 1, // MaxPrecursorIonCharge = 20, // MinProductIonCharge = 1, // MaxProductIonCharge = 20, // MinSequenceMass = 1, // MaxSequenceMass = 30000, // PrecursorIonTolerancePpm = 100, // ProductIonTolerancePpm = 100, // RunTargetDecoyAnalysis = DatabaseSearchMode.Both, // SearchMode = InternalCleavageType.NoInternalCleavage, // MaxNumThreads = 4, // ScanNumbers = scanNumbers, // NumMatchesPerSpectrum = 1, // TagBasedSearch = false, //}; //launcher.RunSearch(correlationThreshold); }
public void TestFeatureMapGeneration() { Console.WriteLine("Testing Working"); var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"\\protoapps\UserData\Jungkap\Joshua\testData\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string testFile = @"\\protoapps\UserData\Jungkap\Joshua\FeatureMap\QC_Shew_Intact_26Sep14_Bane_C2Column3.ms1ft"; const string outputFile = @"D:\MassSpecFiles\training\raw\"; var map = new LcMsFeatureMap(PbfLcMsRun.GetLcMsRun(rawFile), testFile, 2000, 50000); map.SaveImage(outputFile + "test.png"); }
public void TestReadingProMexFile(double massToFind, string expectedScanNumbers) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); var promexFilePath = Path.Combine(Utils.DEFAULT_SPEC_FILES_FOLDER, "QC_Shew_Intact_26Sep14_Bane_C2Column3_Excerpt.ms1ft"); var promexFile = Utils.GetTestFile(methodName, promexFilePath); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); Console.Write("Reading ProMex results..."); var ms1Filter = new Ms1FtFilter(run, new Tolerance(10), promexFile.FullName); Console.WriteLine(); var matchingScanNums = new SortedSet <int>(); foreach (var item in ms1Filter.GetMatchingMs2ScanNums(massToFind)) { matchingScanNums.Add(item); } var scanNumList = string.Join(",", matchingScanNums); Console.WriteLine("Scans with mass {0}:", massToFind); Console.WriteLine(scanNumList); var expectedScanNumList = expectedScanNumbers.Split(','); var matchCount = 0; foreach (var scanNumText in expectedScanNumList) { var scanNum = int.Parse(scanNumText); if (!matchingScanNums.Contains(scanNum)) { Assert.Fail("Did not find scan {0} for mass {1}", scanNum, massToFind); } matchCount++; } Assert.AreEqual(matchCount, matchingScanNums.Count, "Found extra matching scan nums vs. what was expected"); }
public void TestCptacSpikeIn() { const string featureFolder = @"D:\MassSpecFiles\CPTAC_spike_in\promex"; const string rawFolder = @"D:\MassSpecFiles\CPTAC_spike_in\raw"; var outFilePath = string.Format(@"{0}\aligned_features.tsv", featureFolder); var align = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10))); for (var i = 0; i < spikeDatasets.Length; i++) { var featureFilePath = string.Format(@"{0}\{1}.ms1ft", featureFolder, spikeDatasets[i]); var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, spikeDatasets[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } if (!File.Exists(featureFilePath)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", featureFilePath); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var s = 0d; foreach (var scanNum in run.GetMs1ScanVector()) { var spec = run.GetSpectrum(scanNum); var summedIntensity = spec.Peaks.Sum(p => p.Intensity); s += summedIntensity; } foreach (var scanNum in run.GetScanNumbers(2)) { var spec = run.GetSpectrum(scanNum); var summedIntensity = spec.Peaks.Sum(p => p.Intensity); s += summedIntensity; } Console.WriteLine("{0}\t{1}", i, s); //var features = LcMsFeatureAlignment.LoadProMexResult(i, featureFilePath, run); //align.AddDataSet(i, features, run); } //align.AlignFeatures(); //Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures); //align.RefineAbundance(); //OutputAlignmentResult(align, outFilePath, spikeDatasets); }
public void TestFeatureExampleForFigure() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1\CPTAC_Intact_rep6_15Jan15_Bane_C2-14-08-02RZ.pbf"; //const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } var run = PbfLcMsRun.GetLcMsRun(rawFile); var scorer = new LcMsFeatureLikelihood(); var featureFinder = new LcMsPeakMatrix(run, scorer); var feature = featureFinder.GetLcMsPeakCluster(28061.6177, 20, 34, 7624, 7736); var resultsFilePath = Path.Combine(Path.GetTempPath(), Path.GetFileNameWithoutExtension(rawFile) + "_peaks.txt"); var writer = new StreamWriter(resultsFilePath); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", "Scan", "Elution_Time", "Charge", "ID", "MZ", "Intensity", "Pearson_Correlation"); var envelope = feature.TheoreticalEnvelope; foreach (var e in envelope.Isotopes) { Console.WriteLine(e.Ratio); } foreach (var env in feature.EnumerateEnvelopes()) { var corr = env.PearsonCorrelation; for (var i = 0; i < envelope.Size; i++) { var peak = env.Peaks[i]; if (peak == null) { continue; } writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", env.ScanNum, run.GetElutionTime(env.ScanNum), env.Charge, i, peak.Mz, peak.Intensity, corr); } } writer.Close(); Console.WriteLine("Results are in file " + resultsFilePath); }