/// <summary> /// Links a list of peptides to the features provided if the dataset has knowledge of the sequence file file /// </summary> private void LinkPeptidesToFeatures(string sequencePath, IEnumerable<UMCLight> aligneeFeatures, double fdr, double idScore) { // Get the peptides associated with this feature set. var peptideReaderY = PeptideReaderFactory.CreateReader(sequencePath); if (peptideReaderY == null) return; // Load the peptide Y UpdateStatus("Linking peptides to ms/ms"); var linker = new PeptideMsMsLinker(); var peptides = peptideReaderY.Read(sequencePath).ToList(); var filteredPeptides = peptides.ToList().Where(x => PeptideUtility.PassesCutoff(x, idScore, fdr)).ToList(); var msnSpectra = new List<MSSpectra>(); foreach (var feature in aligneeFeatures) { foreach (var msFeature in feature.MsFeatures) { msnSpectra.AddRange(msFeature.MSnSpectra); } } linker.LinkPeptidesToSpectra(msnSpectra, filteredPeptides); }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList<UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions) { UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset.Features.Path, dataset.DatasetId, Providers.FeatureCache); UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path); var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); dataset.BuildScanTimes(scansInfo); var msnSpectra = new List<MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, ref dataset); features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); var sequenceProvider = PeptideReaderFactory.CreateReader(dataset.SequencePath); if (sequenceProvider != null) { UpdateStatus("Reading List of Peptides"); var peptides = sequenceProvider.Read(dataset.SequencePath); var count = 0; var peptideList = peptides.ToList(); peptideList.ForEach(x => x.Id = count++); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } return features; }