/// <summary> /// Finds features given a dataset /// </summary> private IList <UMCLight> FindFeatures(DatasetInformation information, LcmsFeatureFindingOptions featureFindingOptions, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, SpectralOptions peptideOptions, MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder) { UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path)) { rawProviderX.AddDataFile(information.RawFile.Path, 0); UpdateStatus("Creating LCMS Features."); var features = featureFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes); var datasetId = information.DatasetId; foreach (var feature in features) { var lightEntry = new List <MSFeatureLight>(); feature.GroupId = datasetId; foreach (var msFeature in feature.MsFeatures) { msFeature.GroupId = datasetId; foreach (var msmsFeature in msFeature.MSnSpectra) { msmsFeature.GroupId = datasetId; foreach (var peptide in msmsFeature.Peptides) { peptide.GroupId = datasetId; } } if (msFeature.MSnSpectra.Count > 0) { lightEntry.Add(msFeature); } } // We are doing this so that we dont have a ton of MS features in the database feature.MsFeatures.Clear(); feature.MsFeatures.AddRange(lightEntry); } LinkPeptidesToFeatures(information.SequenceFile.Path, features, peptideOptions.Fdr, peptideOptions.IdScore); DeRegisterProgressNotifier(featureFinder); return(features); } }
/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List <UMCLight> FindFeatures(string rawFile, string featureFile) { List <UMCLight> features; using (ISpectraProvider raw = new InformedProteomicsReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.InputFiles.Add(new InputFile { Path = featureFile, FileType = InputFileType.Features }); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); provider.AddDataFile(rawFile, 0); features = finder.FindFeatures(msFeatures, options, provider); } return(features); }
private void GetOutOfRangeScan(string path) { using (var provider = RawLoaderFactory.CreateFileReader(path, 0)) { var summary = provider.GetScanSummary(10000000); } }
public void AskForBigScan(string path) { using (var provider = RawLoaderFactory.CreateFileReader(path)) { provider.AddDataFile(path, 0); ScanSummary summary; provider.GetRawSpectra(10000000, 0, 1, out summary); } }
/// <summary> /// Creates a cache file /// </summary> /// <param name="rawPath"></param> /// <param name="outPath"></param> public void CreateSpectraSummaryFile(string rawPath, string outPath) { using (var readerY = RawLoaderFactory.CreateFileReader(rawPath)) { readerY.AddDataFile(rawPath, 0); var summary = readerY.GetScanData(0); ScanSummaryCache.WriteCache(outPath, summary, rawPath); } }
/// <summary> /// Cosntructor /// </summary> /// <param name="analysis"></param> /// <param name="datasets"></param> /// <param name="msFeatureWindowFactory"></param> public FeatureFindingSettingsViewModel( MultiAlignAnalysis analysis, ObservableCollection <DatasetInformationViewModel> datasets, IFeatureWindowFactory msFeatureWindowFactory = null) { this.analysis = analysis; this.Datasets = datasets; this.msFeatureWindowFactory = msFeatureWindowFactory ?? new MSFeatureViewFactory(); this.msFeatureWindowFactory = new MSFeatureViewFactory(); this.featuresByDataset = new Dictionary <DatasetInformation, IList <UMCLight> >(); this.MsFeatureClusterers = new ObservableCollection <MsFeatureClusteringAlgorithmType>( Enum.GetValues(typeof(MsFeatureClusteringAlgorithmType)).Cast <MsFeatureClusteringAlgorithmType>()); this.LcmsFeatureClusterers = new ObservableCollection <GenericClusteringAlgorithmType>( Enum.GetValues(typeof(GenericClusteringAlgorithmType)).Cast <GenericClusteringAlgorithmType>()); this.CanCreateXics = datasets.Select(dataset => RawLoaderFactory.CreateFileReader(dataset.Dataset.RawFile.Path, dataset.DatasetId)) .Any(reader => reader is ISpectraProvider); // When dataset is selected/unselected, update can executes. this.MessengerInstance.Register <PropertyChangedMessage <bool> >(this, this.UpdateDatasetSelection); // When dataset state changes, update can executes. this.MessengerInstance.Register <PropertyChangedMessage <DatasetInformationViewModel.DatasetStates> >(this, args => { if (args.Sender is DatasetInformationViewModel && args.PropertyName == "DatasetState") { ThreadSafeDispatcher.Invoke(() => { this.FindMsFeaturesCommand.RaiseCanExecuteChanged(); this.PlotMsFeaturesCommand.RaiseCanExecuteChanged(); this.PlotAlignedFeaturesCommand.RaiseCanExecuteChanged(); }); } }); this.FindMsFeaturesCommand = new RelayCommand( async() => await this.LoadMsFeaturesAsync(), () => this.Datasets.Any(ds => ds.IsSelected && !ds.IsFindingFeatures)); this.PlotMsFeaturesCommand = new RelayCommand( async() => await this.PlotMsFeatures(false), () => this.Datasets.Any( ds => ds.DatasetState > DatasetInformationViewModel.DatasetStates.FindingFeatures && ds.IsSelected)); this.PlotAlignedFeaturesCommand = new RelayCommand( async() => await this.PlotMsFeatures(true), () => this.Datasets.Any(ds => ds.IsAligned)); this.RestoreDefaultsCommand = new RelayCommand(this.RestoreDefaults); }
public void ReadTime(string relativePath) { // Get the absolute path var path = GetPath(relativePath); using (var provider = RawLoaderFactory.CreateFileReader(path, 0)) { var starTime = DateTime.Now; var scanData = provider.GetScanSummaries(); var endTime = DateTime.Now; Console.WriteLine(endTime.Subtract(starTime).TotalSeconds); } }
/// <summary> /// Event handler for dataset IsSelected property changed. /// When dataset is selected/unselected, update can executes. /// </summary> /// <param name="args">The message changed arguments, containing the new value.</param> private void UpdateDatasetSelection(PropertyChangedMessage <bool> args) { if (args.Sender is DatasetInformationViewModel && args.PropertyName == "IsSelected") { // Make sure that this message is for DatasetInformationViewModel.IsSelected this.FindMsFeaturesCommand.RaiseCanExecuteChanged(); this.PlotMsFeaturesCommand.RaiseCanExecuteChanged(); this.PlotAlignedFeaturesCommand.RaiseCanExecuteChanged(); // Add an event listener to update CanCreateXics whenever the Datasets collection changes this.CanCreateXics = this.Datasets .Where(dataset => dataset.IsSelected) .Select(dataset => RawLoaderFactory.CreateFileReader(dataset.Dataset.RawFile.Path, dataset.DatasetId)) .Any(reader => reader != null && reader is ISpectraProvider); } }
public void ReadTime(string path) { using (var provider = RawLoaderFactory.CreateFileReader(path)) { provider.AddDataFile(path, 0); var scanData = provider.GetScanData(0); var starTime = DateTime.Now; foreach (var scan in scanData.Keys) { ScanSummary summary; provider.GetRawSpectra(scan, 0, 1, out summary); } var endTime = DateTime.Now; Console.WriteLine(endTime.Subtract(starTime).TotalSeconds); } }
public static void ExportMsMs(this UMCClusterLight cluster, string path, List <DatasetInformation> datasets, IMsMsSpectraWriter writer) { // Let's map the datasets first. var readers = new Dictionary <int, ISpectraProvider>(); var information = new Dictionary <int, DatasetInformation>(); datasets.ForEach(x => information.Add(x.DatasetId, x)); // We are only loading what datasets we have to here! // The point is, each cluster or feature may have come from a different raw data source... // since we dont store all of the data in memory, we have to fetch it from the appropriate source. // This means that we have to go into the raw data and get the scans for an MSMS spectra. foreach (var feature in cluster.Features) { if (!readers.ContainsKey(feature.GroupId)) { if (information.ContainsKey(feature.GroupId)) { var singleInfo = information[feature.GroupId]; if (singleInfo.RawFile != null) { // Make sure that we have a file. if (!File.Exists(singleInfo.RawFile.Path)) { continue; } // Here we create a data file reader for the file we want to access. var provider = RawLoaderFactory.CreateFileReader(singleInfo.RawFile.Path); // Then we make sure we key it to the provider. provider.AddDataFile(singleInfo.RawFile.Path, feature.GroupId); // Then make sure we map it for a dataset, so when we sort through a cluster // we make sure that we can access in O(1) time. readers.Add(feature.GroupId, provider); } } } } // We flag the first write, so that if the file exists, we overwrite. They should have done // checking to make sure that the file was already created...we dont care. var firstWrite = true; foreach (var feature in cluster.Features) { if (readers.ContainsKey(feature.GroupId)) { var provider = readers[feature.GroupId]; foreach (var msFeature in feature.MsFeatures) { foreach (var spectrum in msFeature.MSnSpectra) { var summary = new ScanSummary(); var data = provider.GetRawSpectra(spectrum.Scan, spectrum.GroupId, out summary); spectrum.Peaks = data; spectrum.ScanMetaData = summary; } if (firstWrite) { writer.Write(path, msFeature.MSnSpectra); } else { writer.Append(path, msFeature.MSnSpectra); } } } } }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) { continue; } foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }