Exemple #1
0
        public IEnumerable <UMCLight> TestUmcFeatures(string path)
        {
            var reader = new MsFeatureLightFileReader {
                Delimeter = ","
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };
            var options  = new LcmsFeatureFindingOptions(tolerances);
            var features = finder.FindFeatures(newMsFeatures.ToList(), options, null);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            return(features);
        }
Exemple #2
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List <UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List <MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions,
            IScanSummaryProvider provider,
            IProgress <ProgressData> progress = null)
        {
            // Make features
            if (msFeatures.Count < 1)
            {
                throw new Exception("No features were found in the feature files provided.");
            }

            UpdateStatus("Finding features.");

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider, progress);

            UpdateStatus("Filtering features.");
            List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider);

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return(filteredFeatures);
        }
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List<UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List<UMCLight> features;
            using (ISpectraProvider raw = new ThermoRawDataFileReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();
                info.Features = new InputFile();
                info.Features.Path = featureFile;

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);

                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider = RawLoaderFactory.CreateFileReader(rawFile);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return features;
        }
Exemple #4
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList <UMCLight> FindFeatures(DatasetInformation information,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder)

        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path))
            {
                rawProviderX.AddDataFile(information.RawFile.Path, 0);
                UpdateStatus("Creating LCMS Features.");
                var features = featureFinder.FindFeatures(msFeatures,
                                                          featureFindingOptions,
                                                          rawProviderX);
                features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List <MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }
                        }

                        if (msFeature.MSnSpectra.Count > 0)
                        {
                            lightEntry.Add(msFeature);
                        }
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequenceFile.Path,
                                       features,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return(features);
            }
        }
Exemple #5
0
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List <UMCLight> features;

            using (ISpectraProvider raw = new InformedProteomicsReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();

                info.InputFiles.Add(new InputFile {
                    Path = featureFile, FileType = InputFileType.Features
                });

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net  = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);


                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider   = RawLoaderFactory.CreateFileReader(rawFile);
                provider.AddDataFile(rawFile, 0);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return(features);
        }
 /// <summary>
 ///     Constructor
 /// </summary>
 public MultiAlignAnalysisOptions()
 {
     InstrumentTolerances = new FeatureTolerances();
     MassTagDatabaseOptions = new MassTagDatabaseOptions();
     MsFilteringOptions = new MsFeatureFilteringOptions();
     LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances);
     LcmsFilteringOptions = new LcmsFeatureFilteringOptions();
     LcmsFilteringOptions.TreatAsTimeNotScan = true;
     LcmsFilteringOptions.FeatureLengthRange = new FilterRange(0, 20);
     AlignmentOptions = new AlignmentOptions();
     LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances);
     StacOptions = new StacOptions();
     HasMsMs = false;
     UsedIonMobility = false;
 }
Exemple #7
0
        public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount)
        {
            // Get the absolute path
            var path = GetPath(relativePath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };

            var options = new LcmsFeatureFindingOptions(tolerances);

            IScanSummaryProvider provider = null;
            var rawFilePath = path.Replace("_isos.csv", ".raw");

            UpdateStatus("Using raw data to create better features.");

            var providerCache = new ScanSummaryProviderCache();

            provider = providerCache.GetScanSummaryProvider(rawFilePath, 1);

            var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            Assert.AreEqual(expectedFeatureCount, features.Count);

            return(features);
        }
Exemple #8
0
 /// <summary>
 /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder,
 /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value,
 /// which is used for filtering the features by length
 /// </summary>
 /// <param name="information"></param>
 /// <param name="options"></param>
 /// <param name="filterOptions"></param>
 private static void ValidateFeatureFinderMaxScanLength(
     DatasetInformation information,
     LcmsFeatureFindingOptions options,
     LcmsFeatureFilteringOptions filterOptions)
 {
     if (!filterOptions.FilterOnMinutes)
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeMinutes.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeMinutes.Maximum;
         }
     }
     else
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeScans.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeScans.Maximum;
         }
     }
 }
Exemple #9
0
        /// <summary>
        ///     Constructor
        /// </summary>
        public MultiAlignAnalysisOptions()
        {
            DataLoadOptions        = new DataLoadingOptions();
            InstrumentTolerances   = new FeatureTolerances();
            MassTagDatabaseOptions = new MassTagDatabaseOptions();
            MsFilteringOptions     = new MsFeatureFilteringOptions();
            LcmsFindingOptions     = new LcmsFeatureFindingOptions(InstrumentTolerances);
            LcmsFilteringOptions   = new LcmsFeatureFilteringOptions
            {
                FilterOnMinutes           = true,
                FeatureLengthRangeMinutes = new FilterRange(0, 20),
                MinimumDataPoints         = 3,
                FeatureLengthRangeScans   = new FilterRange(0, 2000)
            };

            AlignmentOptions      = new AlignmentOptions();
            LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances);
            StacOptions           = new StacOptions();
            HasMsMs         = false;
            UsedIonMobility = false;
            this.ClusterPostProcessingoptions = new ClusterPostProcessingOptions();
        }
Exemple #10
0
        public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff)
        {
            var reader = new MsFeatureLightFileReader {
                Delimeter = ","
            };
            var newMsFeatures     = reader.ReadFile(path);
            var finder            = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net  = .05
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);

            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;

            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);


            if (features == null)
            {
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");
            }


            var dirPath = Path.GetDirectoryName(path);

            if (dirPath != null)
            {
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();

                        if (chargeMap.Keys.Count < 2)
                        {
                            continue;
                        }

                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                                 msFeature.Abundance, count);
                            }
                        }


                        var charges = chargeMap.Keys.ToList();

                        for (var i = 0; i < charges.Count; i++)
                        {
                            for (var j = i; j < charges.Count; j++)
                            {
                                var x = chargeMap[charges[i]];
                                var y = chargeMap[charges[j]];

                                var diff = x.MinScan() - y.MinScan();
                                if (diff > maxScanDiff)
                                {
                                    throw new Exception(
                                              "There is a problem with the feature finder across charge states");
                                }
                            }
                        }
                    }
                }
            }


            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Exemple #11
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Exemple #12
0
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };


            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder  = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                                          aligneeDatasets,
                                          featureFindingOptions,
                                          msFilterOptions,
                                          lcmsFilters,
                                          spectralOptions,
                                          finder,
                                          aligner,
                                          clusterer,
                                          matchPath,
                                          errorPath);
            }
        }
        public void TestUmcFeatures(string relativePath, string relativeRawPath)
        {
            // Get absolute paths
            var path = GetPath(relativePath);
            var rawPath = GetPath(relativeRawPath);

            var reader = new MsFeatureLightFileReader { Delimiter = ',' };
            var newMsFeatures = reader.ReadFile(path);
            var finder = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net = .04
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .003,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);
            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;
            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);

            if (features == null)
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");

            var dirPath = Path.GetDirectoryName(path);
            if (dirPath != null)
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();
                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                    msFeature.Abundance, count);
                            }
                        }
                    }
                }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Exemple #14
0
        public void TestLcmsWarpAlignment(string path1, string path2, string svgPath)
        {
            // Convert relative paths to absolute paths
            path1   = GetPath(path1);
            path2   = GetPath(path2);
            svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath);

            var aligner           = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions());
            var isosFilterOptions = new DeconToolsIsosFilterOptions();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions);
            var aligneeMs  = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions);
            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass      = 13,
                DriftTime = .3,
                Net       = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances)
            {
                MaximumNetRange = .002
            };

            var baseline = finder.FindFeatures(baselineMs, options, null);
            var alignee  = finder.FindFeatures(aligneeMs, options, null);
            var data     = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title    = "HeatMapSeries"
            };

            var palette          = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position           = AxisPosition.Right,
                Palette            = palette
            };

            plotModel1.Axes.Add(linearColorAxis1);


            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {
                Position = AxisPosition.Bottom
            };

            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();

            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0       = 0,
                X1       = 1,
                Y0       = 0,
                Y1       = 1,
                FontSize = .2
            };

            var scores = data.HeatScores;
            var width  = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];


            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);


            var svg       = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);

            using (var writer = File.CreateText(svgPath + ".svg"))
            {
                writer.Write(svgString);
            }
        }
Exemple #15
0
        public void ClusterMsMs(string name,
                                string resultPath,
                                string sequencePath,
                                SequenceFileType type,
                                string baseline,
                                string features,
                                double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");


            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
            };

            baselineInfo.InputFiles.Add(new InputFile {
                Path = baseline, FileType = InputFileType.Features
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = baselineRaw, FileType = InputFileType.Raw
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
            };

            aligneeInfo.InputFiles.Add(new InputFile {
                Path = features, FileType = InputFileType.Features
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = featuresRaw, FileType = InputFileType.Raw
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();

            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();

            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);


            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId  = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();

            clusterer.MzTolerance      = .5;
            clusterer.MassTolerance    = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange           = 905;
            clusterer.Progress           += clusterer_Progress;

            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List <MsmsCluster> clusters = null;
            var spectraProviderCache    = new SpectraProviderCache();

            spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId);
            spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId);


            clusters = clusterer.Cluster(allFeatures, spectraProviderCache);
            Console.WriteLine("Found {0} Total Clusters", clusters.Count);

            if (clusters != null)
            {
                var now            = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                                                   name,
                                                   now.Year,
                                                   now.Month,
                                                   now.Day,
                                                   now.Hour,
                                                   now.Minute,
                                                   now.Second,
                                                   resultPath
                                                   );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                                               name,
                                               now.Year,
                                               now.Month,
                                               now.Day,
                                               now.Hour,
                                               now.Minute,
                                               now.Second,
                                               resultPath
                                               );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data     = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data     += string.Format("{0},{1},{2},{3},{4},{5}",
                                                      feature.GroupId,
                                                      feature.Id,
                                                      feature.MassMonoisotopic,
                                                      feature.Mz,
                                                      feature.ChargeState,
                                                      feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                                     feature.GroupId,
                                                     feature.Id,
                                                     feature.MassMonoisotopic,
                                                     feature.Mz,
                                                     feature.ChargeState,
                                                     feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List<UMCLight> FindFeatures(List<MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            ISpectraProvider provider)
        {
            var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            {
                Tolerances =
                    new FeatureTolerances
                    {
                        Mass = options.InstrumentTolerances.Mass,
                        Net = options.MaximumNetRange
                    },
                ScanTolerance = options.MaximumScanRange,
                SpectraProvider = provider
                //TODO: Make sure we have a mass range for XIC's too....
            };

            clusterer.SpectraProvider = provider;

            OnStatus("Starting cluster definition");
            clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Cluster(msFeatures);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;
            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }

            var id = 0;
            var newFeatures = new List<UMCLight>();
            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                    continue;

                feature.Net = Convert.ToDouble(feature.Scan - minScan)/Convert.ToDouble(maxScan - minScan);
                feature.CalculateStatistics(ClusterCentroidRepresentation.Median);
                feature.Net = feature.Net;
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return features;
        }
Exemple #17
0
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures,
                                            LcmsFeatureFindingOptions options, IScanSummaryProvider provider,
                                            IProgress <ProgressData> progress = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException(nameof(provider));
            }

            var tolerances = new FeatureTolerances
            {
                Mass = options.InstrumentTolerances.Mass,
                Net  = options.MaximumNetRange
            };

            var clusterer = new MsToLcmsFeatures(provider, options);

            // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer
            //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            //{
            //    Tolerances =
            //        new FeatureTolerances
            //        {
            //            Mass = options.InstrumentTolerances.Mass,
            //            Net = options.MaximumNetRange
            //        },
            //    ScanTolerance = options.MaximumScanRange,
            //    SpectraProvider = (InformedProteomicsReader) provider
            //    //TODO: Make sure we have a mass range for XIC's too....
            //};

            //clusterer.SpectraProvider = (InformedProteomicsReader) provider;

            //OnStatus("Starting cluster definition");
            //clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Convert(msFeatures, progress);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;

            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }



            var minScanTime = provider.GetScanSummary(minScan).Time;
            var maxScanTime = provider.GetScanSummary(maxScan).Time;
            var id          = 0;
            var newFeatures = new List <UMCLight>();

            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                {
                    continue;
                }
                feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) /
                              (maxScanTime - minScanTime);
                feature.CalculateStatistics();
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance      = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance      = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return(features);
        }
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                alignmentOptions.LCMSWarpOptions,
                spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);
            clusterer.Parameters = new FeatureClusterParameters<UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                    aligneeDatasets,
                    featureFindingOptions,
                    msFilterOptions,
                    lcmsFilters,
                    spectralOptions,
                    finder,
                    aligner,
                    clusterer,
                    matchPath,
                    errorPath);
            }
        }
Exemple #19
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory = GetPath(directoryPath);

            databasePath = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache     = new FeatureLoader
            {
                Providers = providers
            };

            var datasetId = 0;

            foreach (var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
        public void TestPeptideBands(string directory,
            string matchPath)
        {
            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };

            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            var baselineDataset = datasets[0];

            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);
            var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var peptideOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .05,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };

            var features = new List<MSFeatureLight>();

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = finderFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);

                LinkPeptidesToFeatures(baselineDataset.SequencePath,
                    baselineFeatures,
                    peptideOptions.Fdr,
                    peptideOptions.IdScore);

                baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures));
                features = features.Where(x => x.HasMsMs()).ToList();
                features = features.OrderBy(x => x.Mz).ToList();

                var peptideList = new List<MSFeatureLight>();
                foreach (var feature in features)
                {
                    foreach (var spectrum in feature.MSnSpectra)
                    {
                        var peptideFound = false;
                        foreach (var peptide in spectrum.Peptides)
                        {
                            peptideList.Add(feature);
                            peptideFound = true;
                            break;
                        }

                        if (peptideFound)
                            break;
                    }
                }

                using (var writer = File.CreateText(matchPath))
                {
                    writer.WriteLine("Charge\tpmz\tscan\tNET\t");
                    foreach (var feature in peptideList)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan,
                            feature.Net);
                    }
                }
            }
        }
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList<UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            FeatureDataAccessProviders dataProviders,
            MassTagDatabase database,
            bool shouldUseMassTagDbAsBaseline)
        {
            IList<UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                    msFilterOptions,
                    lcmsFindingOptions,
                    lcmsFilterOptions);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                        new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                    throw new NullReferenceException(
                        "The mass tag database has to have data in it if it's being used for drift time alignment.");

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                    return tags;

                if (tags != null)
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
            }
            return baselineFeatures;
        }
Exemple #22
0
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
                                                  MsFeatureFilteringOptions msFilterOptions,
                                                  LcmsFeatureFindingOptions lcmsFindingOptions,
                                                  LcmsFeatureFilteringOptions lcmsFilterOptions,
                                                  DataLoadingOptions dataLoadOptions,
                                                  FeatureDataAccessProviders dataProviders,
                                                  MassTagDatabase database,
                                                  bool shouldUseMassTagDbAsBaseline)
        {
            IList <UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                                                     msFilterOptions,
                                                     lcmsFindingOptions,
                                                     lcmsFilterOptions,
                                                     dataLoadOptions,
                                                     m_scanSummaryProviderCache,
                                                     this.m_identificationsProvider);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                                           new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                {
                    throw new NullReferenceException(
                              "The mass tag database has to have data in it if it's being used for drift time alignment.");
                }

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                {
                    return(tags);
                }

                if (tags != null)
                {
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
                }
            }
            return(baselineFeatures);
        }
Exemple #23
0
        public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath)
        {
            // Convert relative paths to absolute paths
            path1 = GetPath(path1);
            path2 = GetPath(path2);
            pngPath = GetPath(pngPath);

            var fiOutput = new FileInfo(pngPath);
            var didirectory = fiOutput.Directory;
            if (didirectory == null)
                throw new DirectoryNotFoundException(pngPath);

            if (!didirectory.Exists)
                didirectory.Create();

            var aligner = new LcmsWarpFeatureAligner();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1);
            var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2);
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass = 13,
                DriftTime = .3,
                Net = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances);
            options.MaximumNetRange = .002;

            var baseline = finder.FindFeatures(baselineMs, options, null);
            var alignee = finder.FindFeatures(aligneeMs, options, null);
            var alignmentResults = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title = "HeatMapSeries"
            };

            var palette = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position = AxisPosition.Right,
                Palette = palette
            };
            plotModel1.Axes.Add(linearColorAxis1);

            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom};
            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();
            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0 = 0,
                X1 = 1,
                Y0 = 0,
                Y1 = 1,
                FontSize = .2
            };

            var scores = alignmentResults.heatScores;
            var width = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];

            var seriesData = heatMapSeries1.Data;
            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    seriesData[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);

            var svg = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);

            var xml = new XmlDocument();
            xml.LoadXml(svgString);
            var x = SvgDocument.Open(xml); // Svg.SvgDocument();
            var bmp = x.Draw();

            bmp.Save(pngPath);

            var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.heatScores);
            var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.netErrorHistogram, "NET Error", "NET Error");
            var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.massErrorHistogram, "Mass Error", "Mass Error (ppm)");

            var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name));

            var encoder = new SvgEncoder();
            PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder);
            PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder);
            PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder);
        }
Exemple #24
0
        public void TestLcmsWarpAlignment(string path1, string path2, string svgPath)
        {
            // Convert relative paths to absolute paths
            path1 = GetPath(path1);
            path2 = GetPath(path2);
            svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath);

            var aligner = new LcmsWarpFeatureAligner();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1);
            var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2);
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass = 13,
                DriftTime = .3,
                Net = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances)
            {
                MaximumNetRange = .002
            };

            var baseline = finder.FindFeatures(baselineMs, options, null);
            var alignee = finder.FindFeatures(aligneeMs, options, null);
            var data = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title = "HeatMapSeries"
            };

            var palette = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position = AxisPosition.Right,
                Palette = palette
            };
            plotModel1.Axes.Add(linearColorAxis1);

            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom};
            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();
            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0 = 0,
                X1 = 1,
                Y0 = 0,
                Y1 = 1,
                FontSize = .2
            };

            var scores = data.heatScores;
            var width = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];

            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);

            var svg = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);
            using (var writer = File.CreateText(svgPath + ".svg"))
            {
                writer.Write(svgString);
            }
        }
        public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff)
        {
            var reader = new MsFeatureLightFileReader {Delimeter = ","};
            var newMsFeatures = reader.ReadFile(path);
            var finder = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net = .05
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);
            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;
            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);

            if (features == null)
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");

            var dirPath = Path.GetDirectoryName(path);
            if (dirPath != null)
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();

                        if (chargeMap.Keys.Count < 2)
                            continue;

                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                    msFeature.Abundance, count);
                            }
                        }

                        var charges = chargeMap.Keys.ToList();

                        for (var i = 0; i < charges.Count; i++)
                        {
                            for (var j = i; j < charges.Count; j++)
                            {
                                var x = chargeMap[charges[i]];
                                var y = chargeMap[charges[j]];

                                var diff = x.MinScan() - y.MinScan();
                                if (diff > maxScanDiff)
                                {
                                    throw new Exception(
                                        "There is a problem with the feature finder across charge states");
                                }
                            }
                        }
                    }
                }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Exemple #26
0
        public void TestUmcFeatures(string relativePath, string relativeRawPath)
        {
            // Get absolute paths
            var path    = GetPath(relativePath);
            var rawPath = GetPath(relativeRawPath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures     = reader.ReadFile(path);
            var finder            = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net  = .04
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .003,
                MaximumScanRange = 50
            };


            var provider = RawLoaderFactory.CreateFileReader(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;

            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);


            if (features == null)
            {
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");
            }


            var dirPath = Path.GetDirectoryName(path);

            if (dirPath != null)
            {
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();
                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                                 msFeature.Abundance, count);
                            }
                        }
                    }
                }
            }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Exemple #27
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList<UMCLight> LoadDataset(DatasetInformation dataset,
            MsFeatureFilteringOptions msFilteringOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilteringOptions)
        {
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId = dataset.DatasetId;
            var features = UmcLoaderFactory.LoadUmcFeatureData(dataset.Features.Path, dataset.DatasetId,
                Providers.FeatureCache);

            UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                dataset.DatasetName));
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
            var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            dataset.BuildScanTimes(scansInfo);

            var msnSpectra = new List<MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, ref dataset);

                features = CreateLcmsFeatures(dataset,
                    msFeatures,
                    lcmsFindingOptions,
                    lcmsFilteringOptions);

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan = features.Max(feature => feature.Scan);
                var minScan = features.Min(feature => feature.Scan);
                var id = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned = feature.Net;
                    feature.GroupId = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                        from feature in msFeatures
                        let doesFeatureExists = map.ContainsKey(feature.UmcId)
                        where doesFeatureExists
                        select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            var sequenceProvider = PeptideReaderFactory.CreateReader(dataset.SequencePath);
            if (sequenceProvider != null)
            {
                UpdateStatus("Reading List of Peptides");
                var peptides = sequenceProvider.Read(dataset.SequencePath);
                var count = 0;
                var peptideList = peptides.ToList();
                peptideList.ForEach(x => x.Id = count++);

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }
            return features;
        }
Exemple #28
0
        /// <summary>
        /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder, 
        /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value, 
        /// which is used for filtering the features by length
        /// </summary>
        /// <param name="information"></param>
        /// <param name="options"></param>
        /// <param name="filterOptions"></param>
        private static void ValidateFeatureFinderMaxScanLength(
            DatasetInformation information,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            if (!filterOptions.TreatAsTimeNotScan)
            {
                if (options.MaximumScanRange < filterOptions.FeatureLengthRange.Maximum)
                {
                    // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                    options.MaximumScanRange = (int)filterOptions.FeatureLengthRange.Maximum;
                }
                return;
            }

            int maxScanLength;

            if (information.ScanTimes.Count == 0)
            {
                // FeatureLengthRange.Maximum is in minutes
                // Assume 3 scans/second (ballpark estimate)
                maxScanLength = (int)filterOptions.FeatureLengthRange.Maximum * 60 * 3;
            }
            else
            {
                // Find the average number of scans that spans FeatureLengthRange.Maximum minutes

                // Step through the dictionary to find the average number of scans per minute
                var minuteThreshold = 1;
                var scanCountCurrent = 0;
                var scanCountsPerMinute = new List<int>();

                foreach (var entry in information.ScanTimes)
                {
                    if (entry.Value < minuteThreshold)
                    {
                        scanCountCurrent++;
                    }
                    else
                    {
                        if (scanCountCurrent > 0)
                        {
                            scanCountsPerMinute.Add(scanCountCurrent);
                        }
                        scanCountCurrent = 0;
                        minuteThreshold++;
                    }
                }

                int averageScansPerMinute;
                if (scanCountsPerMinute.Count > 0)
                {
                    averageScansPerMinute = (int)scanCountsPerMinute.Average();
                }
                else
                {
                    averageScansPerMinute = 180;
                }

                maxScanLength = (int)(filterOptions.FeatureLengthRange.Maximum * averageScansPerMinute * 1.25);
            }

            if (options.MaximumScanRange < maxScanLength)
            {
                // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                options.MaximumScanRange = maxScanLength;
            }
        }
Exemple #29
0
        public void TestPeptideBands(string directory,
                                     string matchPath)
        {
            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            var baselineDataset = datasets[0];

            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);
            var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var peptideOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .05,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            var features = new List <MSFeatureLight>();

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path))
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = finderFinder.FindFeatures(msFeatures,
                                                                 featureFindingOptions,
                                                                 rawProviderX);

                LinkPeptidesToFeatures(baselineDataset.Sequence.Path,
                                       baselineFeatures,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures));
                features = features.Where(x => x.HasMsMs()).ToList();
                features = features.OrderBy(x => x.Mz).ToList();

                var peptideList = new List <MSFeatureLight>();
                foreach (var feature in features)
                {
                    foreach (var spectrum in feature.MSnSpectra)
                    {
                        var peptideFound = false;
                        foreach (var peptide in spectrum.Peptides)
                        {
                            peptideList.Add(feature);
                            peptideFound = true;
                            break;
                        }

                        if (peptideFound)
                        {
                            break;
                        }
                    }
                }

                using (var writer = File.CreateText(matchPath))
                {
                    writer.WriteLine("Charge\tpmz\tscan\tNET\t");
                    foreach (var feature in peptideList)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan,
                                         feature.Net);
                    }
                }
            }
        }
Exemple #30
0
        public void ClusterMsMs(string name,
            string resultPath,
            string sequencePath,
            SequenceFileType type,
            string baseline,
            string features,
            double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");

            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
                Features = new InputFile {Path = baseline},
                Raw = new InputFile {Path = baselineRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
                Features = new InputFile {Path = features},
                Raw = new InputFile {Path = featuresRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();
            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();
            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();
            clusterer.MzTolerance = .5;
            clusterer.MassTolerance = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange = 905;
            clusterer.Progress += clusterer_Progress;

            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List<MsmsCluster> clusters = null;
            using (var rawReader = new ThermoRawDataFileReader())
            {
                rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId);
                rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId);

                clusters = clusterer.Cluster(allFeatures, rawReader);
                Console.WriteLine("Found {0} Total Clusters", clusters.Count);
            }

            if (clusters != null)
            {
                var now = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data += string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Exemple #32
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList<UMCLight> FindFeatures(  DatasetInformation               information,
            LcmsFeatureFindingOptions   featureFindingOptions,
            MsFeatureFilteringOptions   msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions             peptideOptions,
            IFeatureFinder              featureFinder)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures  = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);
            msFeatures      = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX  = RawLoaderFactory.CreateFileReader(information.RawPath))
            {
                rawProviderX.AddDataFile(information.RawPath, 0);
                UpdateStatus("Creating LCMS Features.");
                var features    = featureFinder.FindFeatures(msFeatures,
                                                             featureFindingOptions,
                                                             rawProviderX);
                features        = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List<MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }

                        }

                        if (msFeature.MSnSpectra.Count > 0)
                            lightEntry.Add(msFeature);
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequencePath,
                                        features,
                                        peptideOptions.Fdr,
                                        peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return features;
            }
        }
        public IEnumerable<UMCLight> TestUmcFeatures(string path)
        {
            var reader = new MsFeatureLightFileReader {Delimeter = ","};
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);
            var features = finder.FindFeatures(newMsFeatures.ToList(), options, null);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            return features;
        }
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory  = GetPath(directoryPath);
            databasePath   = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers   = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache       = new FeatureLoader
            {
               Providers = providers
            };

            var datasetId = 0;
            foreach(var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
Exemple #36
0
        public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath)
        {
            // Convert relative paths to absolute paths
            path1   = GetPath(path1);
            path2   = GetPath(path2);
            pngPath = GetPath(pngPath);

            var fiOutput    = new FileInfo(pngPath);
            var didirectory = fiOutput.Directory;

            if (didirectory == null)
            {
                throw new DirectoryNotFoundException(pngPath);
            }

            if (!didirectory.Exists)
            {
                didirectory.Create();
            }

            var aligner           = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions());
            var isosFilterOptions = new DeconToolsIsosFilterOptions();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions);
            var aligneeMs  = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions);
            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass      = 13,
                DriftTime = .3,
                Net       = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            options.MaximumNetRange = .002;

            var baseline         = finder.FindFeatures(baselineMs, options, null);
            var alignee          = finder.FindFeatures(aligneeMs, options, null);
            var alignmentResults = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title    = "HeatMapSeries"
            };

            var palette          = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position           = AxisPosition.Right,
                Palette            = palette
            };

            plotModel1.Axes.Add(linearColorAxis1);


            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {
                Position = AxisPosition.Bottom
            };

            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();

            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0       = 0,
                X1       = 1,
                Y0       = 0,
                Y1       = 1,
                FontSize = .2
            };

            var scores = alignmentResults.HeatScores;
            var width  = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];

            var seriesData = heatMapSeries1.Data;

            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    seriesData[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);

            var svg       = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);

            var xml = new XmlDocument();

            xml.LoadXml(svgString);
            var x   = SvgDocument.Open(xml); // Svg.SvgDocument();
            var bmp = x.Draw();

            bmp.Save(pngPath);


            var heatmap       = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false);
            var netHistogram  = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error");
            var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)");

            var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name));

            var encoder = new SvgEncoder();

            PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder);
            PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder);
            PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder);
        }
Exemple #37
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List<UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List<MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            // Make features
            if (msFeatures.Count < 1)
                throw new Exception("No features were found in the feature files provided.");

            UpdateStatus("Finding features.");
            ISpectraProvider provider = null;
            if (information.RawPath != null && !string.IsNullOrWhiteSpace(information.RawPath))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = RawLoaderFactory.CreateFileReader(information.RawPath);
                provider.AddDataFile(information.RawPath, 0);
            }

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider);

            UpdateStatus("Filtering features.");
            List<UMCLight> filteredFeatures;
            if (filterOptions.TreatAsTimeNotScan) //Feature length determined based on time (mins)
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features,
                    filterOptions, information.ScanTimes);
            }
            else //Feature length determined based on scans
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions);
            }

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return filteredFeatures;
        }