/// <summary> /// Aligns the dataset to the data stored in the alignment processor. /// </summary> /// <param name="alignmentProcessor">Aligner</param> /// <param name="features">LC-MS Features to align to the baseline</param> /// <param name="alignmentOptions">Options</param> /// <param name="progress"></param> /// <returns></returns> private AlignmentData AlignFeatures(LcmsWarpAlignmentProcessor alignmentProcessor, IEnumerable <UMCLight> features, LcmsWarpAlignmentOptions alignmentOptions, IProgress <ProgressData> progress = null) { var progData = new ProgressData(progress); var localProgress = new Progress <ProgressData>(p => progData.Report(p.Percent, p.Status)); var alignmentData = new AlignmentData(); OnStatus("Starting alignment of features."); // Set minMtdbnet and maxMtdbnet to 0 alignmentData.MinMTDBNET = 0; alignmentData.MaxMTDBNET = 0; var umcLights = features as List <UMCLight> ?? features.ToList(); progData.StepRange(5, "Starting alignment of features."); var filteredFeatures = FilterFeaturesByAbundance(umcLights, alignmentOptions); // Convert the features, and make a map, so that we can re-adjust the aligned values later. var map = FeatureDataConverters.MapFeature(umcLights); progData.StepRange(10, "Setting alignee features."); // Set features OnStatus("Setting alignee features."); alignmentProcessor.SetAligneeDatasetFeatures(filteredFeatures); progData.StepRange(90, "Performing alignment warping."); // Find alignment OnStatus("Performing alignment warping."); alignmentProcessor.PerformAlignmentToMsFeatures(localProgress); progData.StepRange(95); // Extract alignment function alignmentData.AlignmentFunction = alignmentProcessor.GetAlignmentFunction(); progData.StepRange(100); // Extract the NET value for every scan _scanToNETMap = alignmentProcessor.GetScanToNETMapping(); // Correct the features (updates NetAligned and MassMonoisotopicAligned) OnStatus("Applying alignment function to all features."); progData.Status = "Applying alignment function to all features."; umcLights = alignmentProcessor.ApplyNetMassFunctionToAligneeDatasetFeatures(umcLights); progData.Report(100); // Find min/max scan for meta-data var minScanBaseline = int.MaxValue; var maxScanBaseline = int.MinValue; foreach (var feature in umcLights) { maxScanBaseline = Math.Max(maxScanBaseline, feature.Scan); minScanBaseline = Math.Min(minScanBaseline, feature.Scan); } // Update the scan and NET ranges alignmentData.MinScanBaseline = minScanBaseline; alignmentData.MaxScanBaseline = maxScanBaseline; alignmentData.MinMTDBNET = (float)alignmentProcessor.MinReferenceNet; alignmentData.MaxMTDBNET = (float)alignmentProcessor.MaxReferenceNet; // Cache the matching features alignmentData.FeatureMatches = alignmentProcessor.FeatureMatches; // Pull out the heat maps... OnStatus("Retrieving alignment data."); progData.Status = "Retrieving alignment data."; alignmentData.HeatScores = alignmentProcessor.GetAlignmentHeatMap(alignmentOptions.StandardizeHeatScores); // Mass and net error histograms! alignmentData.MassErrorHistogram = alignmentProcessor.GetMassErrorHistogram(alignmentOptions.MassBinSize); alignmentData.NetErrorHistogram = alignmentProcessor.GetNetErrorHistogram(alignmentOptions.NetBinSize); alignmentData.DriftErrorHistogram = alignmentProcessor.GetDriftErrorHistogram(alignmentOptions.DriftTimeBinSize); // Get the residual data from the warp. alignmentData.ResidualData = alignmentProcessor.GetResidualData(); alignmentData.NETIntercept = alignmentProcessor.NetIntercept; alignmentData.NETRsquared = alignmentProcessor.NetRsquared; alignmentData.NETSlope = alignmentProcessor.NetSlope; alignmentData.MassMean = alignmentProcessor.MassMu; alignmentData.MassStandardDeviation = alignmentProcessor.MassStd; alignmentData.NETMean = alignmentProcessor.NetMu; alignmentData.NETStandardDeviation = alignmentProcessor.NetStd; alignmentData.BaselineIsAmtDB = _options.AlignToMassTagDatabase; return(alignmentData); }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList <UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions, DataLoadingOptions dataLoadOptions, ScanSummaryProviderCache providerCache, IdentificationProviderCache identificationProviders, IProgress <ProgressData> progress = null) { var progData = new ProgressData(progress); IScanSummaryProvider provider = null; if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path)) { UpdateStatus("Using raw data to create better features."); provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId); } progData.StepRange(1); progData.Status = "Looking for existing features in the database."; UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider); var hasMsFeatures = features.Any(f => f.MsFeatures.Any()); var msFeatures = new List <MSFeatureLight>(); if (!hasMsFeatures) { progData.StepRange(2); progData.Status = "Loading MS Feature Data."; UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions(); msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions); } progData.StepRange(3); progData.Status = "Loading scan summaries."; ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); ////dataset.BuildScanTimes(scansInfo); progData.StepRange(100); var msnSpectra = new List <MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, provider, ref dataset); progData.Status = "Creating LCMS features."; features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions, provider, new Progress <ProgressData>(pd => progData.Report(pd.Percent))); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } //if (provider is ISpectraProvider) //{ // var spectraProvider = provider as ISpectraProvider; // UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider); //} // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path)) { UpdateStatus("Reading List of Peptides"); var idProvider = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId); var peptideList = idProvider.GetAllIdentifications(); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } progData.Report(100); return(features); }
/// <summary> /// Loads baseline data for alignment. /// </summary> private IList <UMCLight> LoadBaselineData(DatasetInformation baselineInfo, MsFeatureFilteringOptions msFilterOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilterOptions, DataLoadingOptions dataLoadOptions, FeatureDataAccessProviders dataProviders, MassTagDatabase database, bool shouldUseMassTagDbAsBaseline) { IList <UMCLight> baselineFeatures = null; UpdateStatus("Loading baseline features."); if (!shouldUseMassTagDbAsBaseline) { if (baselineInfo == null) { throw new Exception("The baseline dataset was never set."); } var cache = new FeatureLoader { Providers = dataProviders }; RegisterProgressNotifier(cache); UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment."); baselineFeatures = cache.LoadDataset(baselineInfo, msFilterOptions, lcmsFindingOptions, lcmsFilterOptions, dataLoadOptions, m_scanSummaryProviderCache, this.m_identificationsProvider); cache.CacheFeatures(baselineFeatures); if (BaselineFeaturesLoaded != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList())); } DeRegisterProgressNotifier(cache); } else { if (database == null) { throw new NullReferenceException( "The mass tag database has to have data in it if it's being used for drift time alignment."); } UpdateStatus("Setting baseline features for post drift time alignment from mass tag database."); var tags = FeatureDataConverters.ConvertToUMC(database.MassTags); if (BaselineFeaturesLoaded == null) { return(tags); } if (tags != null) { BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database)); } } return(baselineFeatures); }