/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List <UMCLight> FindFeatures(string rawFile, string featureFile) { List <UMCLight> features; using (ISpectraProvider raw = new InformedProteomicsReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.InputFiles.Add(new InputFile { Path = featureFile, FileType = InputFileType.Features }); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); provider.AddDataFile(rawFile, 0); features = finder.FindFeatures(msFeatures, options, provider); } return(features); }
private async Task <Dictionary <DatasetInformation, IList <UMCLight> > > GetFeatures(IEnumerable <DatasetInformationViewModel> datasets) { var featureCache = new FeatureLoader { Providers = this.analysis.DataProviders }; var datasetFeatures = new Dictionary <DatasetInformation, IList <UMCLight> >(); foreach (var file in datasets) { // Select only datasets with features. IList <UMCLight> feat; if (this.featuresByDataset.ContainsKey(file.Dataset)) { feat = this.featuresByDataset[file.Dataset]; } else { var fileInstance = file; feat = await Task.Run(() => UmcLoaderFactory.LoadUmcFeatureData( fileInstance.Dataset, featureCache.Providers.FeatureCache)); this.featuresByDataset.Add(file.Dataset, feat); } datasetFeatures.Add(file.Dataset, feat); } return(datasetFeatures); }
/// <summary> /// Finds features given a dataset /// </summary> private IList <UMCLight> FindFeatures(DatasetInformation information, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path)) { rawProviderX.AddDataFile(information.RawFile.Path, 0); UpdateStatus("Creating LCMS Features."); var features = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes); var datasetId = information.DatasetId; foreach (var feature in features) { var lightEntry = new List <MSFeatureLight>(); feature.GroupId = datasetId; foreach (var msFeature in feature.MsFeatures) { msFeature.GroupId = datasetId; foreach (var msmsFeature in msFeature.MSnSpectra) { msmsFeature.GroupId = datasetId; foreach (var peptide in msmsFeature.Peptides) { peptide.GroupId = datasetId; } } if (msFeature.MSnSpectra.Count > 0) { lightEntry.Add(msFeature); } } // We are doing this so that we dont have a ton of MS features in the database feature.MsFeatures.Clear(); feature.MsFeatures.AddRange(lightEntry); } LinkPeptidesToFeatures(information.SequenceFile.Path, features, peptideOptions.Fdr, peptideOptions.IdScore); DeRegisterProgressNotifier(featureFinder); return(features); } }
public async Task PlotMSFeatures() { //try //{ features = new Dictionary <DatasetInformation, IList <UMCLight> >(); foreach (var file in selectedDatasets.Where(file => file.FeaturesFound)) // Select only datasets with features. { var feat = await Task.Run(() => UmcLoaderFactory.LoadUmcFeatureData(file.Features.Path, file.DatasetId, featureCache.Providers.FeatureCache)); features.Add(file, feat); } msFeatureWindowFactory.CreateNewWindow(features); //} //catch //{ // MessageBox.Show("Feature cache currently being accessed. Try again in a few moments"); //} }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) { throw new DirectoryNotFoundException(pngPath); } if (!didirectory.Exists) { didirectory.Create(); } var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
/// <summary> /// Runs the MultiAlign analysis /// </summary> public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset, IEnumerable <DatasetInformation> aligneeDatasets, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder, IFeatureAligner <IEnumerable <UMCLight>, IEnumerable <UMCLight>, AlignmentData> aligner, IClusterer <UMCLight, UMCClusterLight> clusterer, string matchPath, string errorPath) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = new InformedProteomicsReader()) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); // Then load the alignee dataset foreach (var dataset in aligneeDatasets) { var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions); using (var rawProviderY = new InformedProteomicsReader()) { rawProviderY.AddDataFile(dataset.RawFile.Path, 0); UpdateStatus("Finding alignee features"); var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures, featureFindingOptions, rawProviderY); LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr, peptideOptions.IdScore); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); // cluster before we do anything else.... var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); foreach (var feature in allFeatures) { feature.Net = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; } // This tells us the differences before we align. var clusters = clusterer.Cluster(allFeatures); var preAlignment = AnalyzeClusters(clusters); aligner.AligneeSpectraProvider = providerY; aligner.BaselineSpectraProvider = providerX; UpdateStatus("Aligning data"); // Aligner data var data = aligner.Align(baselineFeatures, aligneeFeatures); var matches = data.Matches; WriteErrors(errorPath, matches); // create anchor points for LCMSWarp alignment var massPoints = new List <RegressionPoint>(); var netPoints = new List <RegressionPoint>(); foreach (var match in matches) { var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz, match.AnchorPointY.Mz); var netError = match.AnchorPointX.Net - match.AnchorPointY.Net; var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError); massPoints.Add(massPoint); var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError); netPoints.Add(netPoint); } foreach (var feature in allFeatures) { feature.UmcCluster = null; feature.ClusterId = -1; } // Then cluster after alignment! UpdateStatus("clustering data"); clusters = clusterer.Cluster(allFeatures); var postAlignment = AnalyzeClusters(clusters); UpdateStatus("Note\tSame\tDifferent"); UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster, preAlignment.DifferentCluster)); UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster, postAlignment.DifferentCluster)); SaveMatches(matchPath, matches); } } } DeRegisterProgressNotifier(aligner); DeRegisterProgressNotifier(featureFinder); DeRegisterProgressNotifier(clusterer); }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList <UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions, DataLoadingOptions dataLoadOptions, ScanSummaryProviderCache providerCache, IdentificationProviderCache identificationProviders, IProgress <ProgressData> progress = null) { var progData = new ProgressData(progress); IScanSummaryProvider provider = null; if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path)) { UpdateStatus("Using raw data to create better features."); provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId); } progData.StepRange(1); progData.Status = "Looking for existing features in the database."; UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider); var hasMsFeatures = features.Any(f => f.MsFeatures.Any()); var msFeatures = new List <MSFeatureLight>(); if (!hasMsFeatures) { progData.StepRange(2); progData.Status = "Loading MS Feature Data."; UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions(); msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions); } progData.StepRange(3); progData.Status = "Loading scan summaries."; ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); ////dataset.BuildScanTimes(scansInfo); progData.StepRange(100); var msnSpectra = new List <MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, provider, ref dataset); progData.Status = "Creating LCMS features."; features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions, provider, new Progress <ProgressData>(pd => progData.Report(pd.Percent))); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } //if (provider is ISpectraProvider) //{ // var spectraProvider = provider as ISpectraProvider; // UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider); //} // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path)) { UpdateStatus("Reading List of Peptides"); var idProvider = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId); var peptideList = idProvider.GetAllIdentifications(); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } progData.Report(100); return(features); }