public void TestPromexClustering() { var provider = new ScanSummaryProviderCache(); var reader1 = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader; var reader2 = provider.GetScanSummaryProvider(pbf2, 1) as InformedProteomicsReader; var reader3 = provider.GetScanSummaryProvider(pbf3, 2) as InformedProteomicsReader; var promexFileReader1 = new PromexFileReader(reader1, 0); var features1 = promexFileReader1.ReadFile(ms1ft1); var promexFileReader2 = new PromexFileReader(reader2, 1); var features2 = promexFileReader2.ReadFile(ms1ft2); var promexFileReader3 = new PromexFileReader(reader3, 2); var features3 = promexFileReader3.ReadFile(ms1ft3); var features = new List <UMCLight>(); features.AddRange(features1); features.AddRange(features2); features.AddRange(features3); var clusterer = new PromexClusterer { Readers = provider, }; var clusters = clusterer.Cluster(features); Console.WriteLine(clusters.Count(c => c.Features.Count > 1)); }
public void CompareClustering() { // Cluster using MultiAlign to Promex adapters var provider = new ScanSummaryProviderCache(); var reader1 = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader; var reader2 = provider.GetScanSummaryProvider(pbf2, 1) as InformedProteomicsReader; var promexFileReader1 = new PromexFileReader(reader1, 0); var features1 = promexFileReader1.ReadFile(ms1ft1); var promexFileReader2 = new PromexFileReader(reader2, 1); var features2 = promexFileReader2.ReadFile(ms1ft2); var features = new List <UMCLight>(); features.AddRange(features1); features.AddRange(features2); var clusterer = new PromexClusterer { Readers = provider, }; var clusters = clusterer.Cluster(features); var clusterCount = clusters.Count(c => c.UmcList.Count > 1); // Cluster using only ProMex var lcmsRun1 = PbfLcMsRun.GetLcMsRun(pbf1); var lcmsRun2 = PbfLcMsRun.GetLcMsRun(pbf2); var aligner = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10, ToleranceUnit.Ppm))); var promexFeatures1 = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun1); aligner.AddDataSet(0, promexFeatures1, lcmsRun1); var promexFeatures2 = LcMsFeatureAlignment.LoadProMexResult(1, ms1ft2, lcmsRun2); aligner.AddDataSet(1, promexFeatures2, lcmsRun2); aligner.AlignFeatures(); var promexClusters = aligner.GetAlignedFeatures(); var promexClusterCount = promexClusters.Count(c => c.Count(f => f != null) > 1); Assert.AreEqual(clusters.Count, promexClusters.Count); Assert.AreEqual(clusterCount, promexClusterCount); }
public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount) { // Get the absolute path var path = GetPath(relativePath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); IScanSummaryProvider provider = null; var rawFilePath = path.Replace("_isos.csv", ".raw"); UpdateStatus("Using raw data to create better features."); var providerCache = new ScanSummaryProviderCache(); provider = providerCache.GetScanSummaryProvider(rawFilePath, 1); var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); // Work on total feature count here. Assert.Greater(features.Count, 0); Assert.AreEqual(expectedFeatureCount, features.Count); return(features); }
public void CompareFileReading() { // Read using MultiAlign to Promex adapters var provider = new ScanSummaryProviderCache(); var reader1 = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader; var promexFileReader = new PromexFileReader(reader1, 0); var features = promexFileReader.ReadFile(ms1ft1).ToList(); var lcmsRun = PbfLcMsRun.GetLcMsRun(pbf1); var promexFeatures = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun).ToList(); Assert.AreEqual(features.Count, promexFeatures.Count); for (int i = 0; i < features.Count; i++) { Assert.AreEqual(features[i].MassMonoisotopic, promexFeatures[i].Mass); ////Assert.AreEqual(features[i].Mz, promexFeatures[i].RepresentativeMz); Assert.AreEqual(features[i].Net, promexFeatures[i].Net); Assert.AreEqual(features[i].ScanStart, promexFeatures[i].MinScanNum); Assert.AreEqual(features[i].ScanEnd, promexFeatures[i].MaxScanNum); Assert.AreEqual(features[i].Abundance, promexFeatures[i].Abundance); } }
/// <summary> /// Load a single dataset from the provider. /// </summary> /// <returns></returns> public IList <UMCLight> LoadDataset(DatasetInformation dataset, MsFeatureFilteringOptions msFilteringOptions, LcmsFeatureFindingOptions lcmsFindingOptions, LcmsFeatureFilteringOptions lcmsFilteringOptions, DataLoadingOptions dataLoadOptions, ScanSummaryProviderCache providerCache, IdentificationProviderCache identificationProviders, IProgress <ProgressData> progress = null) { var progData = new ProgressData(progress); IScanSummaryProvider provider = null; if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path)) { UpdateStatus("Using raw data to create better features."); provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId); } progData.StepRange(1); progData.Status = "Looking for existing features in the database."; UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var datasetId = dataset.DatasetId; var features = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider); var hasMsFeatures = features.Any(f => f.MsFeatures.Any()); var msFeatures = new List <MSFeatureLight>(); if (!hasMsFeatures) { progData.StepRange(2); progData.Status = "Loading MS Feature Data."; UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName)); var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions(); msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions); } progData.StepRange(3); progData.Status = "Loading scan summaries."; ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path); ////dataset.BuildScanTimes(scansInfo); progData.StepRange(100); var msnSpectra = new List <MSSpectra>(); // If we don't have any features, then we have to create some from the MS features // provided to us. if (features.Count < 1) { msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions); msFeatures = Filter(msFeatures, provider, ref dataset); progData.Status = "Creating LCMS features."; features = CreateLcmsFeatures(dataset, msFeatures, lcmsFindingOptions, lcmsFilteringOptions, provider, new Progress <ProgressData>(pd => progData.Report(pd.Percent))); //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan)); //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan)); var maxScan = features.Max(feature => feature.Scan); var minScan = features.Min(feature => feature.Scan); var id = 0; var scanTimes = dataset.ScanTimes; foreach (var feature in features) { feature.Id = id++; //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan); feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]); feature.MassMonoisotopicAligned = feature.MassMonoisotopic; feature.NetAligned = feature.Net; feature.GroupId = datasetId; feature.SpectralCount = feature.MsFeatures.Count; foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null)) { msFeature.UmcId = feature.Id; msFeature.GroupId = datasetId; msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId); msnSpectra.AddRange(msFeature.MSnSpectra); } } } else { if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path)) { var i = 0; foreach (var feature in features) { feature.GroupId = datasetId; feature.Id = i++; } } // Otherwise, we need to map the MS features to the LCMS Features provided. // This would mean that we extracted data from an existing database. if (msFeatures.Count > 0) { var map = FeatureDataConverters.MapFeature(features); foreach (var feature in from feature in msFeatures let doesFeatureExists = map.ContainsKey(feature.UmcId) where doesFeatureExists select feature) { map[feature.UmcId].AddChildFeature(feature); } } } //if (provider is ISpectraProvider) //{ // var spectraProvider = provider as ISpectraProvider; // UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider); //} // Process the MS/MS data with peptides UpdateStatus("Reading List of Peptides"); if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path)) { UpdateStatus("Reading List of Peptides"); var idProvider = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId); var peptideList = idProvider.GetAllIdentifications(); UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences"); var linker = new PeptideMsMsLinker(); linker.LinkPeptidesToSpectra(msnSpectra, peptideList); } progData.Report(100); return(features); }