public IEnumerable <UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return(features); }
/// <summary> /// Creates LCMS Features /// </summary> public List <UMCLight> CreateLcmsFeatures( DatasetInformation information, List <MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, LcmsFeatureFilteringOptions filterOptions, IScanSummaryProvider provider, IProgress <ProgressData> progress = null) { // Make features if (msFeatures.Count < 1) { throw new Exception("No features were found in the feature files provided."); } UpdateStatus("Finding features."); ValidateFeatureFinderMaxScanLength(information, options, filterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); finder.Progress += (sender, args) => UpdateStatus(args.Message); var features = finder.FindFeatures(msFeatures, options, provider, progress); UpdateStatus("Filtering features."); List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider); UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count)); return(filteredFeatures); }
/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List<UMCLight> FindFeatures(string rawFile, string featureFile) { List<UMCLight> features; using (ISpectraProvider raw = new ThermoRawDataFileReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.Features = new InputFile(); info.Features.Path = featureFile; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); features = finder.FindFeatures(msFeatures, options, provider); } return features; }
/// <summary> /// Finds features given a dataset /// </summary> private IList <UMCLight> FindFeatures(DatasetInformation information, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path)) { rawProviderX.AddDataFile(information.RawFile.Path, 0); UpdateStatus("Creating LCMS Features."); var features = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes); var datasetId = information.DatasetId; foreach (var feature in features) { var lightEntry = new List <MSFeatureLight>(); feature.GroupId = datasetId; foreach (var msFeature in feature.MsFeatures) { msFeature.GroupId = datasetId; foreach (var msmsFeature in msFeature.MSnSpectra) { msmsFeature.GroupId = datasetId; foreach (var peptide in msmsFeature.Peptides) { peptide.GroupId = datasetId; } } if (msFeature.MSnSpectra.Count > 0) { lightEntry.Add(msFeature); } } // We are doing this so that we dont have a ton of MS features in the database feature.MsFeatures.Clear(); feature.MsFeatures.AddRange(lightEntry); } LinkPeptidesToFeatures(information.SequenceFile.Path, features, peptideOptions.Fdr, peptideOptions.IdScore); DeRegisterProgressNotifier(featureFinder); return(features); } }
/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List <UMCLight> FindFeatures(string rawFile, string featureFile) { List <UMCLight> features; using (ISpectraProvider raw = new InformedProteomicsReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.InputFiles.Add(new InputFile { Path = featureFile, FileType = InputFileType.Features }); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); provider.AddDataFile(rawFile, 0); features = finder.FindFeatures(msFeatures, options, provider); } return(features); }
/// <summary> /// Constructor /// </summary> public MultiAlignAnalysisOptions() { InstrumentTolerances = new FeatureTolerances(); MassTagDatabaseOptions = new MassTagDatabaseOptions(); MsFilteringOptions = new MsFeatureFilteringOptions(); LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances); LcmsFilteringOptions = new LcmsFeatureFilteringOptions(); LcmsFilteringOptions.TreatAsTimeNotScan = true; LcmsFilteringOptions.FeatureLengthRange = new FilterRange(0, 20); AlignmentOptions = new AlignmentOptions(); LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances); StacOptions = new StacOptions(); HasMsMs = false; UsedIonMobility = false; }
public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount) { // Get the absolute path var path = GetPath(relativePath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); IScanSummaryProvider provider = null; var rawFilePath = path.Replace("_isos.csv", ".raw"); UpdateStatus("Using raw data to create better features."); var providerCache = new ScanSummaryProviderCache(); provider = providerCache.GetScanSummaryProvider(rawFilePath, 1); var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); // Work on total feature count here. Assert.Greater(features.Count, 0); Assert.AreEqual(expectedFeatureCount, features.Count); return(features); }
/// <summary> /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder, /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value, /// which is used for filtering the features by length /// </summary> /// <param name="information"></param> /// <param name="options"></param> /// <param name="filterOptions"></param> private static void ValidateFeatureFinderMaxScanLength( DatasetInformation information, LcmsFeatureFindingOptions options, LcmsFeatureFilteringOptions filterOptions) { if (!filterOptions.FilterOnMinutes) { if (options.MaximumScanRange < filterOptions.FeatureLengthRangeMinutes.Maximum) { // Bump up the scan range used by the LCMS Feature Finder to allow for longer features options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeMinutes.Maximum; } } else { if (options.MaximumScanRange < filterOptions.FeatureLengthRangeScans.Maximum) { // Bump up the scan range used by the LCMS Feature Finder to allow for longer features options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeScans.Maximum; } } }
/// <summary> /// Constructor /// </summary> public MultiAlignAnalysisOptions() { DataLoadOptions = new DataLoadingOptions(); InstrumentTolerances = new FeatureTolerances(); MassTagDatabaseOptions = new MassTagDatabaseOptions(); MsFilteringOptions = new MsFeatureFilteringOptions(); LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances); LcmsFilteringOptions = new LcmsFeatureFilteringOptions { FilterOnMinutes = true, FeatureLengthRangeMinutes = new FilterRange(0, 20), MinimumDataPoints = 3, FeatureLengthRangeScans = new FilterRange(0, 2000) }; AlignmentOptions = new AlignmentOptions(); LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances); StacOptions = new StacOptions(); HasMsMs = false; UsedIonMobility = false; this.ClusterPostProcessingoptions = new ClusterPostProcessingOptions(); }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) { continue; } foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable <DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = new InformedProteomicsReader()) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = new InformedProteomicsReader()) { rawProviderY.AddDataFile(dataset.RawFile.Path, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters <UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, }; baselineInfo.InputFiles.Add(new InputFile { Path = baseline, FileType = InputFileType.Features }); baselineInfo.InputFiles.Add(new InputFile { Path = baselineRaw, FileType = InputFileType.Raw }); baselineInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, }; aligneeInfo.InputFiles.Add(new InputFile { Path = features, FileType = InputFileType.Features }); aligneeInfo.InputFiles.Add(new InputFile { Path = featuresRaw, FileType = InputFileType.Raw }); aligneeInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List <MsmsCluster> clusters = null; var spectraProviderCache = new SpectraProviderCache(); spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId); spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, spectraProviderCache); Console.WriteLine("Found {0} Total Clusters", clusters.Count); if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
/// <summary> /// Finds features /// </summary> /// <returns></returns> public List<UMCLight> FindFeatures(List<MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, ISpectraProvider provider) { var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> { Tolerances = new FeatureTolerances { Mass = options.InstrumentTolerances.Mass, Net = options.MaximumNetRange }, ScanTolerance = options.MaximumScanRange, SpectraProvider = provider //TODO: Make sure we have a mass range for XIC's too.... }; clusterer.SpectraProvider = provider; OnStatus("Starting cluster definition"); clusterer.Progress += (sender, args) => OnStatus(args.Message); var features = clusterer.Cluster(msFeatures); var minScan = int.MaxValue; var maxScan = int.MinValue; foreach (var feature in msFeatures) { minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); } var id = 0; var newFeatures = new List<UMCLight>(); foreach (var feature in features) { if (feature.MsFeatures.Count < 1) continue; feature.Net = Convert.ToDouble(feature.Scan - minScan)/Convert.ToDouble(maxScan - minScan); feature.CalculateStatistics(ClusterCentroidRepresentation.Median); feature.Net = feature.Net; feature.Id = id++; newFeatures.Add(feature); //Sets the width of the feature to be the width of the peak, not the width of the tails var maxAbundance = double.MinValue; var maxAbundanceIndex = 0; for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { var msFeature = feature.MsFeatures[msFeatureIndex]; if (msFeature.Abundance > maxAbundance) { maxAbundance = msFeature.Abundance; maxAbundanceIndex = msFeatureIndex; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan; break; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan; break; } } } return features; }
/// <summary> /// Finds features /// </summary> /// <returns></returns> public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, IScanSummaryProvider provider, IProgress <ProgressData> progress = null) { if (provider == null) { throw new ArgumentNullException(nameof(provider)); } var tolerances = new FeatureTolerances { Mass = options.InstrumentTolerances.Mass, Net = options.MaximumNetRange }; var clusterer = new MsToLcmsFeatures(provider, options); // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> //{ // Tolerances = // new FeatureTolerances // { // Mass = options.InstrumentTolerances.Mass, // Net = options.MaximumNetRange // }, // ScanTolerance = options.MaximumScanRange, // SpectraProvider = (InformedProteomicsReader) provider // //TODO: Make sure we have a mass range for XIC's too.... //}; //clusterer.SpectraProvider = (InformedProteomicsReader) provider; //OnStatus("Starting cluster definition"); //clusterer.Progress += (sender, args) => OnStatus(args.Message); var features = clusterer.Convert(msFeatures, progress); var minScan = int.MaxValue; var maxScan = int.MinValue; foreach (var feature in msFeatures) { minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); } var minScanTime = provider.GetScanSummary(minScan).Time; var maxScanTime = provider.GetScanSummary(maxScan).Time; var id = 0; var newFeatures = new List <UMCLight>(); foreach (var feature in features) { if (feature.MsFeatures.Count < 1) { continue; } feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) / (maxScanTime - minScanTime); feature.CalculateStatistics(); feature.Id = id++; newFeatures.Add(feature); //Sets the width of the feature to be the width of the peak, not the width of the tails var maxAbundance = double.MinValue; var maxAbundanceIndex = 0; for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { var msFeature = feature.MsFeatures[msFeatureIndex]; if (msFeature.Abundance > maxAbundance) { maxAbundance = msFeature.Abundance; maxAbundanceIndex = msFeatureIndex; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan; break; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan; break; } } } return(features); }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters<UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach (var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List<MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath)) { rawProviderX.AddDataFile(baselineDataset.RawPath, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List<MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) break; } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
/// <summary> /// Loads baseline data for alignment. /// </summary> private IList<UMCLight> LoadBaselineData(DatasetInformation baselineInfo, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, FeatureDataAccessProviders dataProviders, MassTagDatabase database, bool shouldUseMassTagDbAsBaseline) { IList<UMCLight> baselineFeatures = null; UpdateStatus("Loading baseline features."); if (!shouldUseMassTagDbAsBaseline) { if (baselineInfo == null) { throw new Exception("The baseline dataset was never set."); } var cache = new FeatureLoader { Providers = dataProviders }; RegisterProgressNotifier(cache); UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment."); baselineFeatures = cache.LoadDataset(baselineInfo, msFilterOptions, lcmsFindingOptions, lcmsFilterOptions); cache.CacheFeatures(baselineFeatures); if (BaselineFeaturesLoaded != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList())); } DeRegisterProgressNotifier(cache); } else { if (database == null) throw new NullReferenceException( "The mass tag database has to have data in it if it's being used for drift time alignment."); UpdateStatus("Setting baseline features for post drift time alignment from mass tag database."); var tags = FeatureDataConverters.ConvertToUMC(database.MassTags); if (BaselineFeaturesLoaded == null) return tags; if (tags != null) BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database)); } return baselineFeatures; }
/// <summary> /// Loads baseline data for alignment. /// </summary> private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, DataLoadingOptions dataLoadOptions, FeatureDataAccessProviders dataProviders, MassTagDatabase database, bool shouldUseMassTagDbAsBaseline) { IList <UMCLight> baselineFeatures = null; UpdateStatus("Loading baseline features."); if (!shouldUseMassTagDbAsBaseline) { if (baselineInfo == null) { throw new Exception("The baseline dataset was never set."); } var cache = new FeatureLoader { Providers = dataProviders }; RegisterProgressNotifier(cache); UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment."); baselineFeatures = cache.LoadDataset(baselineInfo, msFilterOptions, lcmsFindingOptions, lcmsFilterOptions, dataLoadOptions, m_scanSummaryProviderCache, this.m_identificationsProvider); cache.CacheFeatures(baselineFeatures); if (BaselineFeaturesLoaded != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList())); } DeRegisterProgressNotifier(cache); } else { if (database == null) { throw new NullReferenceException( "The mass tag database has to have data in it if it's being used for drift time alignment."); } UpdateStatus("Setting baseline features for post drift time alignment from mass tag database."); var tags = FeatureDataConverters.ConvertToUMC(database.MassTags); if (BaselineFeaturesLoaded == null) { return(tags); } if (tags != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database)); } } return(baselineFeatures); }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) throw new DirectoryNotFoundException(pngPath); if (!didirectory.Exists) didirectory.Create(); var aligner = new LcmsWarpFeatureAligner(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom}; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.heatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.heatScores); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.netErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.massErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom}; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.heatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) continue; foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList<UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions) { UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset.Features.Path, dataset.DatasetId, Providers.FeatureCache); UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); dataset.BuildScanTimes(scansInfo); var msnSpectra = new List<MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, ref dataset); features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); var sequenceProvider = PeptideReaderFactory.CreateReader(dataset.SequencePath); if (sequenceProvider != null) { UpdateStatus("Reading List of Peptides"); var peptides = sequenceProvider.Read(dataset.SequencePath); var count = 0; var peptideList = peptides.ToList(); peptideList.ForEach(x => x.Id = count++); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } return features; }
/// <summary> /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder, /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value, /// which is used for filtering the features by length /// </summary> /// <param name="information"></param> /// <param name="options"></param> /// <param name="filterOptions"></param> private static void ValidateFeatureFinderMaxScanLength( DatasetInformation information, LcmsFeatureFindingOptions options, LcmsFeatureFilteringOptions filterOptions) { if (!filterOptions.TreatAsTimeNotScan) { if (options.MaximumScanRange < filterOptions.FeatureLengthRange.Maximum) { // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers options.MaximumScanRange = (int)filterOptions.FeatureLengthRange.Maximum; } return; } int maxScanLength; if (information.ScanTimes.Count == 0) { // FeatureLengthRange.Maximum is in minutes // Assume 3 scans/second (ballpark estimate) maxScanLength = (int)filterOptions.FeatureLengthRange.Maximum * 60 * 3; } else { // Find the average number of scans that spans FeatureLengthRange.Maximum minutes // Step through the dictionary to find the average number of scans per minute var minuteThreshold = 1; var scanCountCurrent = 0; var scanCountsPerMinute = new List<int>(); foreach (var entry in information.ScanTimes) { if (entry.Value < minuteThreshold) { scanCountCurrent++; } else { if (scanCountCurrent > 0) { scanCountsPerMinute.Add(scanCountCurrent); } scanCountCurrent = 0; minuteThreshold++; } } int averageScansPerMinute; if (scanCountsPerMinute.Count > 0) { averageScansPerMinute = (int)scanCountsPerMinute.Average(); } else { averageScansPerMinute = 180; } maxScanLength = (int)(filterOptions.FeatureLengthRange.Maximum * averageScansPerMinute * 1.25); } if (options.MaximumScanRange < maxScanLength) { // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers options.MaximumScanRange = maxScanLength; } }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, Features = new InputFile {Path = baseline}, Raw = new InputFile {Path = baselineRaw}, Sequence = new InputFile {Path = sequencePath} }; Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, Features = new InputFile {Path = features}, Raw = new InputFile {Path = featuresRaw}, Sequence = new InputFile {Path = sequencePath} }; var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List<MsmsCluster> clusters = null; using (var rawReader = new ThermoRawDataFileReader()) { rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId); rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, rawReader); Console.WriteLine("Found {0} Total Clusters", clusters.Count); } if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable<DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, IFeatureFinder featureFinder, IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> aligner, IClusterer<UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath)) { rawProviderX.AddDataFile(baselineDataset.RawPath, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath)) { rawProviderY.AddDataFile(dataset.RawPath, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List<RegressionPoint>(); var netPoints = new List<RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList <UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions, DataLoadingOptions dataLoadOptions, ScanSummaryProviderCache providerCache, IdentificationProviderCache identificationProviders, IProgress <ProgressData> progress = null) { var progData = new ProgressData(progress); IScanSummaryProvider provider = null; if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path)) { UpdateStatus("Using raw data to create better features."); provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId); } progData.StepRange(1); progData.Status = "Looking for existing features in the database."; UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider); var hasMsFeatures = features.Any(f => f.MsFeatures.Any()); var msFeatures = new List <MSFeatureLight>(); if (!hasMsFeatures) { progData.StepRange(2); progData.Status = "Loading MS Feature Data."; UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions(); msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions); } progData.StepRange(3); progData.Status = "Loading scan summaries."; ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); ////dataset.BuildScanTimes(scansInfo); progData.StepRange(100); var msnSpectra = new List <MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, provider, ref dataset); progData.Status = "Creating LCMS features."; features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions, provider, new Progress <ProgressData>(pd => progData.Report(pd.Percent))); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } //if (provider is ISpectraProvider) //{ // var spectraProvider = provider as ISpectraProvider; // UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider); //} // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path)) { UpdateStatus("Reading List of Peptides"); var idProvider = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId); var peptideList = idProvider.GetAllIdentifications(); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } progData.Report(100); return(features); }
/// <summary> /// Finds features given a dataset /// </summary> private IList<UMCLight> FindFeatures( DatasetInformation information, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, IFeatureFinder featureFinder) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawPath)) { rawProviderX.AddDataFile(information.RawPath, 0); UpdateStatus("Creating LCMS Features."); var features = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions); var datasetId = information.DatasetId; foreach (var feature in features) { var lightEntry = new List<MSFeatureLight>(); feature.GroupId = datasetId; foreach (var msFeature in feature.MsFeatures) { msFeature.GroupId = datasetId; foreach (var msmsFeature in msFeature.MSnSpectra) { msmsFeature.GroupId = datasetId; foreach (var peptide in msmsFeature.Peptides) { peptide.GroupId = datasetId; } } if (msFeature.MSnSpectra.Count > 0) lightEntry.Add(msFeature); } // We are doing this so that we dont have a ton of MS features in the database feature.MsFeatures.Clear(); feature.MsFeatures.AddRange(lightEntry); } LinkPeptidesToFeatures(information.SequencePath, features, peptideOptions.Fdr, peptideOptions.IdScore); DeRegisterProgressNotifier(featureFinder); return features; } }
public IEnumerable<UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return features; }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach(var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) { throw new DirectoryNotFoundException(pngPath); } if (!didirectory.Exists) { didirectory.Create(); } var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
/// <summary> /// Creates LCMS Features /// </summary> public List<UMCLight> CreateLcmsFeatures( DatasetInformation information, List<MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, LcmsFeatureFilteringOptions filterOptions) { // Make features if (msFeatures.Count < 1) throw new Exception("No features were found in the feature files provided."); UpdateStatus("Finding features."); ISpectraProvider provider = null; if (information.RawPath != null && !string.IsNullOrWhiteSpace(information.RawPath)) { UpdateStatus("Using raw data to create better features."); provider = RawLoaderFactory.CreateFileReader(information.RawPath); provider.AddDataFile(information.RawPath, 0); } ValidateFeatureFinderMaxScanLength(information, options, filterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); finder.Progress += (sender, args) => UpdateStatus(args.Message); var features = finder.FindFeatures(msFeatures, options, provider); UpdateStatus("Filtering features."); List<UMCLight> filteredFeatures; if (filterOptions.TreatAsTimeNotScan) //Feature length determined based on time (mins) { filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, information.ScanTimes); } else //Feature length determined based on scans { filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions); } UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count)); return filteredFeatures; }