Example #1
0
        private int FilterAndOutputFeatures(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, string ms1FeaturesFilePath)
        {
            var featureCounter = new int[1];

            Ms1FtEntry.WriteToFile(ms1FeaturesFilePath, FilterFeaturesWithOutput(container, featureFinder, outCsvFilePath, featureCounter), Parameters.ScoreReport);

            return(featureCounter[0]);
        }
Example #2
0
        private int FilterAndOutputFeaturesOld(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, string ms1FeaturesFilePath)
        {
            var featureId = 0;

            Stream csvStream = new MemoryStream();

            if (Parameters.CsvOutput)
            {
                csvStream = new FileStream(outCsvFilePath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
            }
            // write result files
            using (var tsvWriter = new StreamWriter(ms1FeaturesFilePath))
                using (var csvWriter = new StreamWriter(csvStream))
                {
                    tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

                    if (Parameters.CsvOutput)
                    {
                        csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
                    }

                    var filteredFeatures = container.GetFilteredFeatures(featureFinder);
                    foreach (var feature in filteredFeatures)
                    {
                        featureId++;
                        tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                        var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                        if (Parameters.CsvOutput)
                        {
                            foreach (var envelope in feature.EnumerateEnvelopes())
                            {
                                //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                                var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                                if (mostAbuPeak == null || !mostAbuPeak.Active)
                                {
                                    continue;
                                }

                                var fitscore = 1.0 - feature.BestCorrelationScore;
                                csvWriter.WriteLine("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance,
                                                    mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId);
                            }
                        }
                    }
                }

            return(featureId);
        }
Example #3
0
        /// <summary>
        /// Create <see cref="Ms1FtEntry"/> objects for the features, and output to csv (if desired).
        /// </summary>
        /// <param name="container"></param>
        /// <param name="featureFinder"></param>
        /// <param name="outCsvFilePath"></param>
        /// <param name="featureCounter"></param>
        /// <returns></returns>
        private IEnumerable <Ms1FtEntry> FilterFeaturesWithOutput(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, int[] featureCounter)
        {
            // Using an array, since we can't use ref or out parameters
            featureCounter[0] = 0;

            Stream csvStream = new MemoryStream();

            if (Parameters.CsvOutput)
            {
                csvStream = new FileStream(outCsvFilePath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
            }
            using (var csvWriter = new StreamWriter(csvStream))
            {
                if (Parameters.CsvOutput)
                {
                    csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
                }

                var filteredFeatures = container.GetFilteredFeatures(featureFinder);
                foreach (var feature in filteredFeatures)
                {
                    featureCounter[0]++;

                    if (Parameters.CsvOutput)
                    {
                        var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                        foreach (var envelope in feature.EnumerateEnvelopes())
                        {
                            //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                            var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                            if (mostAbuPeak == null || !mostAbuPeak.Active)
                            {
                                continue;
                            }

                            var fitscore = 1.0 - feature.BestCorrelationScore;
                            csvWriter.WriteLine("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance,
                                                mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureCounter);
                        }
                    }

                    yield return(feature.ToMs1FtEntry(featureCounter[0]));
                }
            }
        }
Example #4
0
        public void TestLcMsFeatureFinder()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft");
            //var scoreDataPath = @"D:\MassSpecFiles\training";
            var scorer    = new LcMsFeatureLikelihood();
            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", pbfFile.FullName);

            var run           = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            var featureFinder = new LcMsPeakMatrix(run, scorer);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec",
                              (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            var    maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");

            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(
                        @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);

            // write result files
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");

            stopwatch.Stop();

            // Start to quantify accurate abundance
            stopwatch.Restart();
            //var quantAnalyzer = new TargetMs1FeatureMatrix(run);
            //var oriResult = new List<Ms1FeatureCluster>();
            //var quantResult = new List<Ms1Feature>();

            var featureId   = 0;
            var ms1ScanNums = run.GetMs1ScanVector();
            //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance");

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);

            foreach (var feature in filteredFeatures)
            {
                Console.Write(featureId);
                Console.Write("\t");
                Console.Write(feature.Mass);
                Console.Write("\t");
                Console.Write(feature.MinScanNum);
                Console.Write("\t");
                Console.Write(feature.MaxScanNum);
                Console.Write("\t");
                Console.Write(feature.MinCharge);
                Console.Write("\t");
                Console.Write(feature.MaxCharge);
                Console.Write("\t");

                Console.Write(feature.RepresentativeScanNum);
                Console.Write("\t");
                Console.Write(feature.RepresentativeMz);
                Console.Write("\t");
                Console.Write(feature.RepresentativeCharge);
                Console.Write("\t");

                //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t");
                //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestCorrelationScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestCorrelationScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestIntensityScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestIntensityScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.AbundanceDistributionAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.AbundanceDistributionAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.XicCorrelationBetweenBestCharges[0]);
                Console.Write("\t");
                Console.Write(feature.XicCorrelationBetweenBestCharges[1]);
                Console.Write("\t");

                Console.Write(feature.Score);
                Console.Write("\n");
                featureId++;
            }
        }
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage(@"Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();


            // write result files
            var tsvWriter = new StreamWriter(ms1FeaturesFilePath);

            tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

            StreamWriter csvWriter = null;

            if (Parameters.CsvOutput)
            {
                csvWriter = new StreamWriter(outCsvFilePath);
                csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
            }

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);
            var featureId        = 0;

            foreach (var feature in filteredFeatures)
            {
                featureId++;
                tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                if (csvWriter != null)
                {
                    foreach (var envelope in feature.EnumerateEnvelopes())
                    {
                        //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                        var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                        if (mostAbuPeak == null || !mostAbuPeak.Active)
                        {
                            continue;
                        }

                        var fitscore = 1.0 - feature.BestCorrelationScore;
                        csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId));
                    }
                }
            }
            tsvWriter.Close();

            Console.WriteLine(@"Complete feature filtration");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of filtered features = {0}", featureId);
            Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath);

            if (csvWriter != null)
            {
                csvWriter.Close();
                Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);
            if (string.IsNullOrEmpty(outDirectory))
                return -1;

            var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath);
                return -2;
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage(@"Cannot find input file: " + rawFile);
                return -3;
            }

            var stopwatch = Stopwatch.StartNew();
            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);
            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return -4;
            }

            var comparer = featureFinder.Comparer;
            var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();
            

            // write result files
            var tsvWriter = new StreamWriter(ms1FeaturesFilePath);
            tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

            StreamWriter csvWriter = null;
            if (Parameters.CsvOutput)
            {
                csvWriter = new StreamWriter(outCsvFilePath);
                csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
            }
            
            var filteredFeatures = container.GetFilteredFeatures(featureFinder);
            var featureId = 0;
            foreach (var feature in filteredFeatures)
            {
                featureId++;
                tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                if (csvWriter != null)
                {
                    foreach (var envelope in feature.EnumerateEnvelopes())
                    {
                        //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                        var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                        if (mostAbuPeak == null || !mostAbuPeak.Active) continue;

                        var fitscore = 1.0 - feature.BestCorrelationScore;
                        csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId));
                    }
                }
            }
            tsvWriter.Close();

            Console.WriteLine(@"Complete feature filtration");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of filtered features = {0}", featureId);
            Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath);

            if (csvWriter != null)
            {
                csvWriter.Close();
                Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return 0;
        }
Example #7
0
        public void TestLcMsFeatureFinder()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            //const string rawFile = @"D:\MassSpecFiles\CompRef\CPTAC_Intact_CR_Pool_2_25Jun15_Bane_15-02-02RZ.pbf";
            //const string rawFile = @"D:\MassSpecFiles\IMER\Dey_IMERblast_01_08May14_Alder_14-01-33.pbf";
            //const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_3\MZ20150729FG_WT1.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft");
            //var scoreDataPath = @"D:\MassSpecFiles\training";
            var scorer = new LcMsFeatureLikelihood();
            var stopwatch = Stopwatch.StartNew();
            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);
            var featureFinder = new LcMsPeakMatrix(run, scorer);
            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec",
                (stopwatch.ElapsedMilliseconds)/1000.0d);

            var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            var maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            double totalMassBin = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");

            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin)%1000 == 0)
                {
                    var elapsed = (stopwatch.ElapsedMilliseconds)/1000.0d;
                    var processedBins = binNum - minSearchMassBin;
                    var processedPercentage = ((double) processedBins/totalMassBin)*100;
                    Console.WriteLine(
                        @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds)/1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);

            // write result files
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");
            

            stopwatch.Stop();

            // Start to quantify accurate abundance
            stopwatch.Restart();
            //var quantAnalyzer = new TargetMs1FeatureMatrix(run);
            //var oriResult = new List<Ms1FeatureCluster>();
            //var quantResult = new List<Ms1Feature>();

            var featureId = 0;
            var ms1ScanNums = run.GetMs1ScanVector();
            //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance");
            
            var filteredFeatures = container.GetFilteredFeatures(featureFinder);
            foreach (var feature in filteredFeatures)
            {
                Console.Write(featureId);
                Console.Write("\t");
                Console.Write(feature.Mass);
                Console.Write("\t");
                Console.Write(feature.MinScanNum);
                Console.Write("\t");
                Console.Write(feature.MaxScanNum);
                Console.Write("\t");
                Console.Write(feature.MinCharge);
                Console.Write("\t");
                Console.Write(feature.MaxCharge);
                Console.Write("\t");

                Console.Write(feature.RepresentativeScanNum);
                Console.Write("\t");
                Console.Write(feature.RepresentativeMz);
                Console.Write("\t");
                Console.Write(feature.RepresentativeCharge);
                Console.Write("\t");

                //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t");
                //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestCorrelationScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestCorrelationScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestIntensityScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestIntensityScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.AbundanceDistributionAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.AbundanceDistributionAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.XicCorrelationBetweenBestCharges[0]);
                Console.Write("\t");
                Console.Write(feature.XicCorrelationBetweenBestCharges[1]);
                Console.Write("\t");

                Console.Write(feature.Score);
                Console.Write("\n");
                featureId++;

            }
        }
Example #8
0
        public void TestLcMsFeatureFinder()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            //const string rawFile = @"D:\MassSpecFiles\CompRef\CPTAC_Intact_CR_Pool_2_25Jun15_Bane_15-02-02RZ.pbf";
            //const string rawFile = @"D:\MassSpecFiles\IMER\Dey_IMERblast_01_08May14_Alder_14-01-33.pbf";
            //const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_3\MZ20150729FG_WT1.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft");
            //var scoreDataPath = @"D:\MassSpecFiles\training";
            var scorer    = new LcMsFeatureLikelihood();
            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var featureFinder = new LcMsPeakMatrix(run, scorer);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec",
                              (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            var    maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");

            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(
                        @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);

            // write result files
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");


            stopwatch.Stop();

            // Start to quantify accurate abundance
            stopwatch.Restart();
            //var quantAnalyzer = new TargetMs1FeatureMatrix(run);
            //var oriResult = new List<Ms1FeatureCluster>();
            //var quantResult = new List<Ms1Feature>();

            var featureId   = 0;
            var ms1ScanNums = run.GetMs1ScanVector();
            //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance");

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);

            foreach (var feature in filteredFeatures)
            {
                Console.Write(featureId);
                Console.Write("\t");
                Console.Write(feature.Mass);
                Console.Write("\t");
                Console.Write(feature.MinScanNum);
                Console.Write("\t");
                Console.Write(feature.MaxScanNum);
                Console.Write("\t");
                Console.Write(feature.MinCharge);
                Console.Write("\t");
                Console.Write(feature.MaxCharge);
                Console.Write("\t");

                Console.Write(feature.RepresentativeScanNum);
                Console.Write("\t");
                Console.Write(feature.RepresentativeMz);
                Console.Write("\t");
                Console.Write(feature.RepresentativeCharge);
                Console.Write("\t");

                //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t");
                //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestCorrelationScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestCorrelationScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestIntensityScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestIntensityScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.AbundanceDistributionAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.AbundanceDistributionAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.XicCorrelationBetweenBestCharges[0]);
                Console.Write("\t");
                Console.Write(feature.XicCorrelationBetweenBestCharges[1]);
                Console.Write("\t");

                Console.Write(feature.Score);
                Console.Write("\n");
                featureId++;
            }
        }
Example #9
0
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage("Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine("Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            if (featureFinder.Ms1PeakCount == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile));
                return(-5);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine("Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = processedBins / totalMassBin * 100;
                    Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine("Complete MS1 feature extraction.");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine("Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();

            var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath);

            Console.WriteLine("Complete feature filtration");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of filtered features = {0}", featureId);
            Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath);

            if (Parameters.CsvOutput)
            {
                Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }