/// <summary> /// Creates LCMS Features /// </summary> public List <UMCLight> CreateLcmsFeatures( DatasetInformation information, List <MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, LcmsFeatureFilteringOptions filterOptions, IScanSummaryProvider provider, IProgress <ProgressData> progress = null) { // Make features if (msFeatures.Count < 1) { throw new Exception("No features were found in the feature files provided."); } UpdateStatus("Finding features."); ValidateFeatureFinderMaxScanLength(information, options, filterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); finder.Progress += (sender, args) => UpdateStatus(args.Message); var features = finder.FindFeatures(msFeatures, options, provider, progress); UpdateStatus("Filtering features."); List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider); UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count)); return(filteredFeatures); }
/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List <UMCLight> FindFeatures(string rawFile, string featureFile) { List <UMCLight> features; using (ISpectraProvider raw = new InformedProteomicsReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.InputFiles.Add(new InputFile { Path = featureFile, FileType = InputFileType.Features }); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); provider.AddDataFile(rawFile, 0); features = finder.FindFeatures(msFeatures, options, provider); } return(features); }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) { throw new DirectoryNotFoundException(pngPath); } if (!didirectory.Exists) { didirectory.Create(); } var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters <UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach (var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, }; baselineInfo.InputFiles.Add(new InputFile { Path = baseline, FileType = InputFileType.Features }); baselineInfo.InputFiles.Add(new InputFile { Path = baselineRaw, FileType = InputFileType.Raw }); baselineInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, }; aligneeInfo.InputFiles.Add(new InputFile { Path = features, FileType = InputFileType.Features }); aligneeInfo.InputFiles.Add(new InputFile { Path = featuresRaw, FileType = InputFileType.Raw }); aligneeInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List <MsmsCluster> clusters = null; var spectraProviderCache = new SpectraProviderCache(); spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId); spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, spectraProviderCache); Console.WriteLine("Found {0} Total Clusters", clusters.Count); if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }