public void TestLcMsFeatureFinder() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft"); //var scoreDataPath = @"D:\MassSpecFiles\training"; var scorer = new LcMsFeatureLikelihood(); var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", pbfFile.FullName); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); var featureFinder = new LcMsPeakMatrix(run, scorer); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); var maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine( @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); // write result files Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Stop(); // Start to quantify accurate abundance stopwatch.Restart(); //var quantAnalyzer = new TargetMs1FeatureMatrix(run); //var oriResult = new List<Ms1FeatureCluster>(); //var quantResult = new List<Ms1Feature>(); var featureId = 0; var ms1ScanNums = run.GetMs1ScanVector(); //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance"); var filteredFeatures = container.GetFilteredFeatures(featureFinder); foreach (var feature in filteredFeatures) { Console.Write(featureId); Console.Write("\t"); Console.Write(feature.Mass); Console.Write("\t"); Console.Write(feature.MinScanNum); Console.Write("\t"); Console.Write(feature.MaxScanNum); Console.Write("\t"); Console.Write(feature.MinCharge); Console.Write("\t"); Console.Write(feature.MaxCharge); Console.Write("\t"); Console.Write(feature.RepresentativeScanNum); Console.Write("\t"); Console.Write(feature.RepresentativeMz); Console.Write("\t"); Console.Write(feature.RepresentativeCharge); Console.Write("\t"); //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t"); //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[0]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[1]); Console.Write("\t"); Console.Write(feature.Score); Console.Write("\n"); featureId++; } }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage(@"Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); // write result files var tsvWriter = new StreamWriter(ms1FeaturesFilePath); tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport)); StreamWriter csvWriter = null; if (Parameters.CsvOutput) { csvWriter = new StreamWriter(outCsvFilePath); csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID"); } var filteredFeatures = container.GetFilteredFeatures(featureFinder); var featureId = 0; foreach (var feature in filteredFeatures) { featureId++; tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport)); var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0]; if (csvWriter != null) { foreach (var envelope in feature.EnumerateEnvelopes()) { //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0]; var mostAbuPeak = envelope.Peaks[mostAbuIdx]; if (mostAbuPeak == null || !mostAbuPeak.Active) { continue; } var fitscore = 1.0 - feature.BestCorrelationScore; csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId)); } } } tsvWriter.Close(); Console.WriteLine(@"Complete feature filtration"); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of filtered features = {0}", featureId); Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath); if (csvWriter != null) { csvWriter.Close(); Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) return -1; var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath); return -2; } if (!File.Exists(rawFile)) { ShowErrorMessage(@"Cannot find input file: " + rawFile); return -3; } var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return -4; } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); // write result files var tsvWriter = new StreamWriter(ms1FeaturesFilePath); tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport)); StreamWriter csvWriter = null; if (Parameters.CsvOutput) { csvWriter = new StreamWriter(outCsvFilePath); csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID"); } var filteredFeatures = container.GetFilteredFeatures(featureFinder); var featureId = 0; foreach (var feature in filteredFeatures) { featureId++; tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport)); var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0]; if (csvWriter != null) { foreach (var envelope in feature.EnumerateEnvelopes()) { //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0]; var mostAbuPeak = envelope.Peaks[mostAbuIdx]; if (mostAbuPeak == null || !mostAbuPeak.Active) continue; var fitscore = 1.0 - feature.BestCorrelationScore; csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId)); } } } tsvWriter.Close(); Console.WriteLine(@"Complete feature filtration"); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of filtered features = {0}", featureId); Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath); if (csvWriter != null) { csvWriter.Close(); Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return 0; }
public void TestLcMsFeatureFinder() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string rawFile = @"D:\MassSpecFiles\CompRef\CPTAC_Intact_CR_Pool_2_25Jun15_Bane_15-02-02RZ.pbf"; //const string rawFile = @"D:\MassSpecFiles\IMER\Dey_IMERblast_01_08May14_Alder_14-01-33.pbf"; //const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_3\MZ20150729FG_WT1.pbf"; if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft"); //var scoreDataPath = @"D:\MassSpecFiles\training"; var scorer = new LcMsFeatureLikelihood(); var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, scorer); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds)/1000.0d); var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); var maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin)%1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds)/1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double) processedBins/totalMassBin)*100; Console.WriteLine( @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds)/1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); // write result files Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Stop(); // Start to quantify accurate abundance stopwatch.Restart(); //var quantAnalyzer = new TargetMs1FeatureMatrix(run); //var oriResult = new List<Ms1FeatureCluster>(); //var quantResult = new List<Ms1Feature>(); var featureId = 0; var ms1ScanNums = run.GetMs1ScanVector(); //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance"); var filteredFeatures = container.GetFilteredFeatures(featureFinder); foreach (var feature in filteredFeatures) { Console.Write(featureId); Console.Write("\t"); Console.Write(feature.Mass); Console.Write("\t"); Console.Write(feature.MinScanNum); Console.Write("\t"); Console.Write(feature.MaxScanNum); Console.Write("\t"); Console.Write(feature.MinCharge); Console.Write("\t"); Console.Write(feature.MaxCharge); Console.Write("\t"); Console.Write(feature.RepresentativeScanNum); Console.Write("\t"); Console.Write(feature.RepresentativeMz); Console.Write("\t"); Console.Write(feature.RepresentativeCharge); Console.Write("\t"); //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t"); //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[0]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[1]); Console.Write("\t"); Console.Write(feature.Score); Console.Write("\n"); featureId++; } }
public void TestLcMsFeatureFinder() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string rawFile = @"D:\MassSpecFiles\CompRef\CPTAC_Intact_CR_Pool_2_25Jun15_Bane_15-02-02RZ.pbf"; //const string rawFile = @"D:\MassSpecFiles\IMER\Dey_IMERblast_01_08May14_Alder_14-01-33.pbf"; //const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_3\MZ20150729FG_WT1.pbf"; if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft"); //var scoreDataPath = @"D:\MassSpecFiles\training"; var scorer = new LcMsFeatureLikelihood(); var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, scorer); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); var maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine( @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); // write result files Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Stop(); // Start to quantify accurate abundance stopwatch.Restart(); //var quantAnalyzer = new TargetMs1FeatureMatrix(run); //var oriResult = new List<Ms1FeatureCluster>(); //var quantResult = new List<Ms1Feature>(); var featureId = 0; var ms1ScanNums = run.GetMs1ScanVector(); //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance"); var filteredFeatures = container.GetFilteredFeatures(featureFinder); foreach (var feature in filteredFeatures) { Console.Write(featureId); Console.Write("\t"); Console.Write(feature.Mass); Console.Write("\t"); Console.Write(feature.MinScanNum); Console.Write("\t"); Console.Write(feature.MaxScanNum); Console.Write("\t"); Console.Write(feature.MinCharge); Console.Write("\t"); Console.Write(feature.MaxCharge); Console.Write("\t"); Console.Write(feature.RepresentativeScanNum); Console.Write("\t"); Console.Write(feature.RepresentativeMz); Console.Write("\t"); Console.Write(feature.RepresentativeCharge); Console.Write("\t"); //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t"); //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestDistanceScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestCorrelationScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.BestIntensityScoreAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[0]); Console.Write("\t"); Console.Write(feature.AbundanceDistributionAcrossCharge[1]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[0]); Console.Write("\t"); Console.Write(feature.XicCorrelationBetweenBestCharges[1]); Console.Write("\t"); Console.Write(feature.Score); Console.Write("\n"); featureId++; } }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage("Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine("Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } if (featureFinder.Ms1PeakCount == 0) { ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile)); return(-5); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine("Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = processedBins / totalMassBin * 100; Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine("Complete MS1 feature extraction."); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine("Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath); Console.WriteLine("Complete feature filtration"); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of filtered features = {0}", featureId); Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath); if (Parameters.CsvOutput) { Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }