/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List<UMCLight> FindFeatures(string rawFile, string featureFile) { List<UMCLight> features; using (ISpectraProvider raw = new ThermoRawDataFileReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.Features = new InputFile(); info.Features.Path = featureFile; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); features = finder.FindFeatures(msFeatures, options, provider); } return features; }
/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List <UMCLight> FindFeatures(string rawFile, string featureFile) { List <UMCLight> features; using (ISpectraProvider raw = new InformedProteomicsReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.InputFiles.Add(new InputFile { Path = featureFile, FileType = InputFileType.Features }); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); provider.AddDataFile(rawFile, 0); features = finder.FindFeatures(msFeatures, options, provider); } return(features); }
public LcmsFeatureFindingOptions(FeatureTolerances tolerances) { InstrumentTolerances = tolerances; MaximumScanRange = 50; MaximumNetRange = .005; }
public IEnumerable <UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return(features); }
/// <summary> /// Resets the parameters to their default values. /// </summary> public virtual void Clear() { Tolerances = new FeatureTolerances(); OnlyClusterSameChargeStates = CONST_DEFAULT_ONLY_CLUSTER_SAME_CHARGE_STATES; DistanceFunction = DistanceFactory <T> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean); RangeFunction = WithinRange; CentroidRepresentation = ClusterCentroidRepresentation.Median; }
/// <summary> /// Constructor /// </summary> public MultiAlignAnalysisOptions() { InstrumentTolerances = new FeatureTolerances(); MassTagDatabaseOptions = new MassTagDatabaseOptions(); MsFilteringOptions = new MsFeatureFilteringOptions(); LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances); LcmsFilteringOptions = new LcmsFeatureFilteringOptions(); LcmsFilteringOptions.TreatAsTimeNotScan = true; LcmsFilteringOptions.FeatureLengthRange = new FilterRange(0, 20); AlignmentOptions = new AlignmentOptions(); LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances); StacOptions = new StacOptions(); HasMsMs = false; UsedIonMobility = false; }
public void CalculateClusterErrorHistograms(List <UMCClusterLight> clusters, List <double> massErrorPpm, List <double> netError, List <double> counts, FeatureTolerances tolerances, Dictionary <int, List <double> > ranges) { List <UMCClusterLight> sortedClusters = new List <UMCClusterLight>(); sortedClusters.AddRange(clusters); sortedClusters.Sort(delegate(UMCClusterLight x, UMCClusterLight y) { return(x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); }); foreach (UMCClusterLight x in sortedClusters) { int count = 0; List <double> netErrors = new List <double>(); foreach (UMCClusterLight y in sortedClusters) { if (x.ID == y.ID) { continue; } double ppmDiff = Feature.ComputeMassPPMDifference(y.MassMonoisotopicAligned, x.MassMonoisotopicAligned); if (System.Math.Abs(ppmDiff) > tolerances.Mass) { continue; } double netDiff = x.RetentionTime - y.RetentionTime; netErrors.Add(netDiff); count = count + 1; massErrorPpm.Add(ppmDiff); netError.Add(netDiff); } counts.Add(Convert.ToDouble(count)); if (ranges.ContainsKey(count) == false) { ranges.Add(count, new List <double>()); } ranges[count].AddRange(netErrors); } }
public LcmsFeatureFindingOptions() { InstrumentTolerances = new FeatureTolerances(); this.FirstPassClusterer = MsFeatureClusteringAlgorithmType.SingleLinkage; this.SecondPassClusterer = GenericClusteringAlgorithmType.BinarySearchTree; this.FindXics = true; this.RefineXics = true; this.SmoothingWindowSize = 5; this.SmoothingPolynomialOrder = 2; this.XicRelativeIntensityThreshold = 0.05; this.SecondPassClustering = true; MaximumScanRange = 50; MaximumNetRange = .005; }
public void CalculateClusterErrorHistograms(FeatureDataAccessProviders providers, List<double> massErrorPpm, List<double> netError, List<double> counts, FeatureTolerances tolerances) { List<UMCLight> featuresA = providers.FeatureCache.FindByDatasetId(0); List<UMCLight> featuresB = providers.FeatureCache.FindByDatasetId(1); featuresA.Sort( delegate (UMCLight x, UMCLight y) { return x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); } ); featuresB.Sort( delegate (UMCLight x, UMCLight y) { return x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); } ); int i = 0; foreach(UMCLight featureA in featuresA) { double count = 0; int j = i + 1; foreach (UMCLight featureB in featuresB) { double ppmDiff = Feature.ComputeMassPPMDifference(featureB.MassMonoisotopicAligned, featureA.MassMonoisotopicAligned); if (Math.Abs(ppmDiff) > tolerances.Mass) { continue; } double netDiff = featureA.RetentionTime - featureB.RetentionTime; if (Math.Abs(netDiff) > tolerances.RetentionTime) { continue; } massErrorPpm.Add(ppmDiff); netError.Add(netDiff); } i = j; counts.Add(count); } }
public void CalculateClusterErrorHistograms(List<UMCClusterLight> clusters, List<double> massErrorPpm, List<double> netError, List<double> counts, FeatureTolerances tolerances, Dictionary<int, List<double>> ranges) { List<UMCClusterLight> sortedClusters = new List<UMCClusterLight>(); sortedClusters.AddRange(clusters); sortedClusters.Sort(delegate(UMCClusterLight x, UMCClusterLight y) { return x.MassMonoisotopic.CompareTo(y.MassMonoisotopic); }); foreach(UMCClusterLight x in sortedClusters) { int count = 0; List<double> netErrors = new List<double>(); foreach(UMCClusterLight y in sortedClusters) { if (x.ID == y.ID) continue; double ppmDiff = Feature.ComputeMassPPMDifference(y.MassMonoisotopicAligned, x.MassMonoisotopicAligned); if (System.Math.Abs(ppmDiff) > tolerances.Mass) { continue; } double netDiff = x.RetentionTime - y.RetentionTime; netErrors.Add(netDiff); count = count + 1; massErrorPpm.Add(ppmDiff); netError.Add(netDiff); } counts.Add(Convert.ToDouble(count)); if (ranges.ContainsKey(count) == false) { ranges.Add(count, new List<double>()); } ranges[count].AddRange(netErrors); } }
public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount) { // Get the absolute path var path = GetPath(relativePath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); IScanSummaryProvider provider = null; var rawFilePath = path.Replace("_isos.csv", ".raw"); UpdateStatus("Using raw data to create better features."); var providerCache = new ScanSummaryProviderCache(); provider = providerCache.GetScanSummaryProvider(rawFilePath, 1); var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); // Work on total feature count here. Assert.Greater(features.Count, 0); Assert.AreEqual(expectedFeatureCount, features.Count); return(features); }
public ClusterDetailViewModel() { m_numberOfIsotopes = 4; m_scanMaps = new Dictionary<int, List<MSFeatureLight>>(); Charges = new ObservableCollection<ChargeStateViewModel>(); FeatureFindingTolerances = new FeatureTolerances { Mass = 10, DriftTime = 3, Net = 50 }; ClusterTolerances = new FeatureTolerances { Mass = 10, DriftTime = 3, Net = .03 }; Features = new ObservableCollection<UMCTreeViewModel>(); }
public ClusterDetailViewModel() { m_numberOfIsotopes = 4; m_scanMaps = new Dictionary <int, List <MSFeatureLight> >(); Charges = new ObservableCollection <ChargeStateViewModel>(); FeatureFindingTolerances = new FeatureTolerances { Mass = 10, DriftTime = 3, Net = 50 }; ClusterTolerances = new FeatureTolerances { Mass = 10, DriftTime = 3, Net = .03 }; Features = new ObservableCollection <UMCTreeViewModel>(); }
/// <summary> /// Constructor /// </summary> public MultiAlignAnalysisOptions() { DataLoadOptions = new DataLoadingOptions(); InstrumentTolerances = new FeatureTolerances(); MassTagDatabaseOptions = new MassTagDatabaseOptions(); MsFilteringOptions = new MsFeatureFilteringOptions(); LcmsFindingOptions = new LcmsFeatureFindingOptions(InstrumentTolerances); LcmsFilteringOptions = new LcmsFeatureFilteringOptions { FilterOnMinutes = true, FeatureLengthRangeMinutes = new FilterRange(0, 20), MinimumDataPoints = 3, FeatureLengthRangeScans = new FilterRange(0, 2000) }; AlignmentOptions = new AlignmentOptions(); LcmsClusteringOptions = new LcmsClusteringOptions(InstrumentTolerances); StacOptions = new StacOptions(); HasMsMs = false; UsedIonMobility = false; this.ClusterPostProcessingoptions = new ClusterPostProcessingOptions(); }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) throw new DirectoryNotFoundException(pngPath); if (!didirectory.Exists) didirectory.Create(); var aligner = new LcmsWarpFeatureAligner(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom}; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.heatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.heatScores); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.netErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.massErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
/// <summary> /// Finds features /// </summary> /// <returns></returns> public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures, LcmsFeatureFindingOptions options, IScanSummaryProvider provider, IProgress <ProgressData> progress = null) { if (provider == null) { throw new ArgumentNullException(nameof(provider)); } var tolerances = new FeatureTolerances { Mass = options.InstrumentTolerances.Mass, Net = options.MaximumNetRange }; var clusterer = new MsToLcmsFeatures(provider, options); // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> //{ // Tolerances = // new FeatureTolerances // { // Mass = options.InstrumentTolerances.Mass, // Net = options.MaximumNetRange // }, // ScanTolerance = options.MaximumScanRange, // SpectraProvider = (InformedProteomicsReader) provider // //TODO: Make sure we have a mass range for XIC's too.... //}; //clusterer.SpectraProvider = (InformedProteomicsReader) provider; //OnStatus("Starting cluster definition"); //clusterer.Progress += (sender, args) => OnStatus(args.Message); var features = clusterer.Convert(msFeatures, progress); var minScan = int.MaxValue; var maxScan = int.MinValue; foreach (var feature in msFeatures) { minScan = Math.Min(feature.Scan, minScan); maxScan = Math.Max(feature.Scan, maxScan); } var minScanTime = provider.GetScanSummary(minScan).Time; var maxScanTime = provider.GetScanSummary(maxScan).Time; var id = 0; var newFeatures = new List <UMCLight>(); foreach (var feature in features) { if (feature.MsFeatures.Count < 1) { continue; } feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) / (maxScanTime - minScanTime); feature.CalculateStatistics(); feature.Id = id++; newFeatures.Add(feature); //Sets the width of the feature to be the width of the peak, not the width of the tails var maxAbundance = double.MinValue; var maxAbundanceIndex = 0; for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { var msFeature = feature.MsFeatures[msFeatureIndex]; if (msFeature.Abundance > maxAbundance) { maxAbundance = msFeature.Abundance; maxAbundanceIndex = msFeatureIndex; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan; break; } } for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++) { if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05) { feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan; break; } } } return(features); }
public void GenerateClusterAlignmentStatistics(string relativeDatabasePath, string relativeName, string name, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var databasePath = GetPath(relativeDatabasePath); var outputPath = GetOutputPath(relativeName); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } // Connect to the NHibernate database var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; UpdateStatus("Retrieving all datasets for test."); var datasets = providers.DatasetCache.FindAll(); // Create our algorithms var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters<UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(clusterer); for (var i = 0; i < datasets.Count - 1; i++) { var matchPath = string.Format("{0}-{1}-matches.txt", name, i); var errorPath = string.Format("{0}-{1}-errors.txt", name, i); matchPath = Path.Combine(outputPath, matchPath); errorPath = Path.Combine(outputPath, errorPath); var aligneeDataset = datasets[i + 1]; var baselineDataset = datasets[i]; // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath)) { rawProviderX.AddDataFile(baselineDataset.RawPath, 0); // Load the baseline reference set using (var rawProviderY = RawLoaderFactory.CreateFileReader(aligneeDataset.RawPath)) { rawProviderY.AddDataFile(aligneeDataset.RawPath, 0); var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers); var aligneeFeatures = RetrieveFeatures(aligneeDataset.DatasetId, providers); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); AlignDatasets( baselineFeatures, aligneeFeatures, providerX, providerY, aligner, clusterer, matchPath, errorPath); } } } }
public LcmsClusteringOptions(FeatureTolerances instrumentTolerances) { InstrumentTolerances = instrumentTolerances; }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters<UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void CalculateClusterErrorHistograms(List <UMCClusterLight> list, List <double> mass20ppm, List <double> net20ppm, List <double> featureCounts20ppm, FeatureTolerances tolerances) { throw new NotImplementedException(); }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach(var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void GenerateClusterAlignmentStatistics(string relativeDatabasePath, string relativeName, string name, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var databasePath = GetPath(relativeDatabasePath); var outputPath = GetOutputPath(relativeName); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } // Connect to the NHibernate database var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; UpdateStatus("Retrieving all datasets for test."); var datasets = providers.DatasetCache.FindAll(); // Create our algorithms var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters <UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(clusterer); for (var i = 0; i < datasets.Count - 1; i++) { var matchPath = string.Format("{0}-{1}-matches.txt", name, i); var errorPath = string.Format("{0}-{1}-errors.txt", name, i); matchPath = Path.Combine(outputPath, matchPath); errorPath = Path.Combine(outputPath, errorPath); var aligneeDataset = datasets[i + 1]; var baselineDataset = datasets[i]; // Load the baseline reference set using (var rawProviderX = new InformedProteomicsReader()) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); // Load the baseline reference set using (var rawProviderY = new InformedProteomicsReader()) { rawProviderY.AddDataFile(aligneeDataset.RawFile.Path, 0); var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers); var aligneeFeatures = RetrieveFeatures(aligneeDataset.DatasetId, providers); var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures); var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures); AlignDatasets(baselineFeatures, aligneeFeatures, providerX, providerY, aligner, clusterer, matchPath, errorPath); } } } }
/// <summary> /// Constructor /// </summary> public MsFeatureTreeClusterer() { Tolerances = new FeatureTolerances(); ScanTolerance = CONST_SCAN_TOLERANCE; FilteringOptions = new LcmsFeatureFilteringOptions(); }
/// <summary> /// Constructor. /// </summary> public BoxMSnLinker() { Tolerances = new FeatureTolerances(); Tolerances.Mass = .5; AdductMass = SubAtomicParticleLibrary.MASS_PROTON; }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) continue; foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestClustering( string directory, string outputPath, FeatureAlignmentType alignmentType, LcmsFeatureClusteringAlgorithmType clusterType) { var matchPath = string.Format("{0}.txt", outputPath); var errorPath = string.Format("{0}-errors.txt", outputPath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Setup our alignment options var alignmentOptions = new AlignmentOptions(); var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; // Create our algorithms var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType, alignmentOptions.LCMSWarpOptions, spectralOptions); var clusterer = ClusterFactory.Create(clusterType); clusterer.Parameters = new FeatureClusterParameters <UMCLight> { Tolerances = featureTolerances }; RegisterProgressNotifier(aligner); RegisterProgressNotifier(finder); RegisterProgressNotifier(clusterer); var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; for (var i = 0; i < 1; i++) { var aligneeDatasets = datasets.Where((t, j) => j != i).ToList(); PerformMultiAlignAnalysis(datasets[0], aligneeDatasets, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder, aligner, clusterer, matchPath, errorPath); } }
public void CalculateClusterErrorHistograms(FeatureDataAccessProviders providers, List <double> massErrorPpm, List <double> netError, List <double> counts, FeatureTolerances tolerances) { List <UMCLight> featuresA = providers.FeatureCache.FindByDatasetId(0); List <UMCLight> featuresB = providers.FeatureCache.FindByDatasetId(1); featuresA.Sort( delegate(UMCLight x, UMCLight y) { return(x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); } ); featuresB.Sort( delegate(UMCLight x, UMCLight y) { return(x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); } ); int i = 0; foreach (UMCLight featureA in featuresA) { double count = 0; int j = i + 1; foreach (UMCLight featureB in featuresB) { double ppmDiff = Feature.ComputeMassPPMDifference(featureB.MassMonoisotopicAligned, featureA.MassMonoisotopicAligned); if (Math.Abs(ppmDiff) > tolerances.Mass) { continue; } double netDiff = featureA.RetentionTime - featureB.RetentionTime; if (Math.Abs(netDiff) > tolerances.RetentionTime) { continue; } massErrorPpm.Add(ppmDiff); netError.Add(netDiff); } i = j; counts.Add(count); } }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetInformation.SupportedFileTypes; var extensions = new List<string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var inputFiles = DatasetSearcher.FindDatasets(directory, extensions, SearchOption.TopDirectoryOnly); var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List<MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath)) { rawProviderX.AddDataFile(baselineDataset.RawPath, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List<MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) break; } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) { continue; } foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public LcmsFeatureFindingOptions(FeatureTolerances tolerances) : this() { InstrumentTolerances = tolerances; }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, }; baselineInfo.InputFiles.Add(new InputFile { Path = baseline, FileType = InputFileType.Features }); baselineInfo.InputFiles.Add(new InputFile { Path = baselineRaw, FileType = InputFileType.Raw }); baselineInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, }; aligneeInfo.InputFiles.Add(new InputFile { Path = features, FileType = InputFileType.Features }); aligneeInfo.InputFiles.Add(new InputFile { Path = featuresRaw, FileType = InputFileType.Raw }); aligneeInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List <MsmsCluster> clusters = null; var spectraProviderCache = new SpectraProviderCache(); spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId); spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, spectraProviderCache); Console.WriteLine("Found {0} Total Clusters", clusters.Count); if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
public LcmsClusteringOptions() { InstrumentTolerances = new FeatureTolerances(); }
public void TestLcmsWarpAlignment(string path1, string path2, string svgPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath); var aligner = new LcmsWarpFeatureAligner(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances) { MaximumNetRange = .002 }; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var data = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis {Position = AxisPosition.Bottom}; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = data.heatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); using (var writer = File.CreateText(svgPath + ".svg")) { writer.Write(svgString); } }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, Features = new InputFile {Path = baseline}, Raw = new InputFile {Path = baselineRaw}, Sequence = new InputFile {Path = sequencePath} }; Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, Features = new InputFile {Path = features}, Raw = new InputFile {Path = featuresRaw}, Sequence = new InputFile {Path = sequencePath} }; var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List<MsmsCluster> clusters = null; using (var rawReader = new ThermoRawDataFileReader()) { rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId); rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, rawReader); Console.WriteLine("Found {0} Total Clusters", clusters.Count); } if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
/// <summary> /// constructor. /// </summary> public PeakMatcherOptions() { Tolerances = new FeatureTolerances(); DaltonShift = 0; }
public void CreateFeaturesTest(string relativePath, string outputPath) { var path = GetPath(relativePath); var tolerances = new FeatureTolerances { Mass = 13, Net = .01, DriftTime = 30, FragmentationWindowSize = .5 }; var reader = new MsFeatureLightFileReader(); var rawFeatures = reader.ReadFile(path); var msFilterOptions = new MsFeatureFilteringOptions { ChargeRange = new FilterRange(1,6), MinimumIntensity = 200000, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions); var finder = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> {Tolerances = tolerances}; finder.Progress += (sender, args) => Console.WriteLine(args.Message); var features = finder.Cluster(rawFeatures.ToList()); var filterOptions = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange { Maximum = 30, Minimum = 10 } }; features = LcmsFeatureFilters.FilterFeatures(features, filterOptions); Console.WriteLine(@"Found - {0} features", features.Count); using (var writer = File.CreateText(GetPath(outputPath))) { var index = 0; foreach (var feature in features) { feature.Id = index++; feature.CalculateStatistics(ClusterCentroidRepresentation.Mean); writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}", TextDelimiter, feature.Net, feature.ChargeState, feature.Mz, feature.Scan, feature.MassMonoisotopic, feature.MassMonoisotopicAligned, feature.Id, feature.ScanStart, feature.ScanEnd, feature.ScanAligned ); } } }
public void CreateFeatureDatabase(string directoryPath, string databasePath) { var directory = GetPath(directoryPath); databasePath = GetPath(databasePath); // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass + 6, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var lcmsFilters = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange(50, 300) }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var spectralOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .01, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); NHibernateUtil.CreateDatabase(databasePath); // Synchronization and IO for serializing all data to the database. var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true); var cache = new FeatureLoader { Providers = providers }; var datasetId = 0; foreach (var dataset in datasets) { dataset.DatasetId = datasetId++; var features = FindFeatures(dataset, featureFindingOptions, msFilterOptions, lcmsFilters, spectralOptions, finder); cache.CacheFeatures(features); } providers.DatasetCache.AddAll(datasets); }
public void CreateFeaturesTest(string relativePath, string outputPath) { var path = GetPath(relativePath); var tolerances = new FeatureTolerances { Mass = 13, Net = .01, DriftTime = 30, FragmentationWindowSize = .5 }; var reader = new MsFeatureLightFileReader(); var rawFeatures = reader.ReadFile(path); var msFilterOptions = new MsFeatureFilteringOptions { ChargeRange = new FilterRange(1, 6), MinimumIntensity = 200000, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions); var finder = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight> { Tolerances = tolerances }; finder.Progress += (sender, args) => Console.WriteLine(args.Message); var features = finder.Cluster(rawFeatures.ToList()); var filterOptions = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange { Maximum = 30, Minimum = 10 } }; features = LcmsFeatureFilters.FilterFeatures(features, filterOptions); Console.WriteLine(@"Found - {0} features", features.Count); using (var writer = File.CreateText(GetPath(outputPath))) { var index = 0; foreach (var feature in features) { feature.Id = index++; feature.CalculateStatistics(ClusterCentroidRepresentation.Mean); writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}", TextDelimiter, feature.Net, feature.ChargeState, feature.Mz, feature.Scan, feature.MassMonoisotopic, feature.MassMonoisotopicAligned, feature.Id, feature.ScanStart, feature.ScanEnd, feature.ScanAligned ); } } }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public IEnumerable<UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return features; }
public void CalculateClusterErrorHistograms(List<UMCClusterLight> list, List<double> mass20ppm, List<double> net20ppm, List<double> featureCounts20ppm, FeatureTolerances tolerances) { throw new NotImplementedException(); }
public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath) { // Convert relative paths to absolute paths path1 = GetPath(path1); path2 = GetPath(path2); pngPath = GetPath(pngPath); var fiOutput = new FileInfo(pngPath); var didirectory = fiOutput.Directory; if (didirectory == null) { throw new DirectoryNotFoundException(pngPath); } if (!didirectory.Exists) { didirectory.Create(); } var aligner = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions()); var isosFilterOptions = new DeconToolsIsosFilterOptions(); var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions); var aligneeMs = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { FragmentationWindowSize = .5, Mass = 13, DriftTime = .3, Net = .01 }; var options = new LcmsFeatureFindingOptions(tolerances); options.MaximumNetRange = .002; var baseline = finder.FindFeatures(baselineMs, options, null); var alignee = finder.FindFeatures(aligneeMs, options, null); var alignmentResults = aligner.Align(baseline, alignee); var plotModel1 = new PlotModel { Subtitle = "Interpolated, cartesian axes", Title = "HeatMapSeries" }; var palette = OxyPalettes.Hot(200); var linearColorAxis1 = new LinearColorAxis { InvalidNumberColor = OxyColors.Gray, Position = AxisPosition.Right, Palette = palette }; plotModel1.Axes.Add(linearColorAxis1); // linearColorAxis1. var linearAxis1 = new LinearAxis { Position = AxisPosition.Bottom }; plotModel1.Axes.Add(linearAxis1); var linearAxis2 = new LinearAxis(); plotModel1.Axes.Add(linearAxis2); var heatMapSeries1 = new HeatMapSeries { X0 = 0, X1 = 1, Y0 = 0, Y1 = 1, FontSize = .2 }; var scores = alignmentResults.HeatScores; var width = scores.GetLength(0); var height = scores.GetLength(1); heatMapSeries1.Data = new double[width, height]; var seriesData = heatMapSeries1.Data; for (var i = 0; i < width; i++) { for (var j = 0; j < height; j++) { seriesData[i, j] = Convert.ToDouble(scores[i, j]); } } plotModel1.Series.Add(heatMapSeries1); var svg = new SvgExporter(); var svgString = svg.ExportToString(plotModel1); var xml = new XmlDocument(); xml.LoadXml(svgString); var x = SvgDocument.Open(xml); // Svg.SvgDocument(); var bmp = x.Draw(); bmp.Save(pngPath); var heatmap = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false); var netHistogram = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error"); var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)"); var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name)); var encoder = new SvgEncoder(); PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder); PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder); PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder); }
public void TestPeptideBands(string directory, string matchPath) { // Loads the supported MultiAlign types var supportedTypes = DatasetLoader.SupportedFileTypes; var extensions = new List <string>(); supportedTypes.ForEach(x => extensions.Add("*" + x.Extension)); // Find our datasets var datasetLoader = new DatasetLoader(); var datasets = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly); // Options setup var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap); var featureTolerances = new FeatureTolerances { Mass = instrumentOptions.Mass, Net = instrumentOptions.NetTolerance, DriftTime = instrumentOptions.DriftTimeTolerance }; var msFilterOptions = new MsFeatureFilteringOptions { MinimumIntensity = 5000, ChargeRange = new FilterRange(1, 6), ShouldUseChargeFilter = true, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var baselineDataset = datasets[0]; UpdateStatus("Loading baseline features."); var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path); msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions); var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var peptideOptions = new SpectralOptions { ComparerType = SpectralComparison.CosineDotProduct, Fdr = .05, IdScore = 1e-09, MzBinSize = .5, MzTolerance = .5, NetTolerance = .1, RequiredPeakCount = 32, SimilarityCutoff = .75, TopIonPercent = .8 }; var features = new List <MSFeatureLight>(); // Load the baseline reference set using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path)) { rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0); UpdateStatus("Creating Baseline LCMS Features."); var baselineFeatures = finderFinder.FindFeatures(msFeatures, featureFindingOptions, rawProviderX); LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr, peptideOptions.IdScore); baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures)); features = features.Where(x => x.HasMsMs()).ToList(); features = features.OrderBy(x => x.Mz).ToList(); var peptideList = new List <MSFeatureLight>(); foreach (var feature in features) { foreach (var spectrum in feature.MSnSpectra) { var peptideFound = false; foreach (var peptide in spectrum.Peptides) { peptideList.Add(feature); peptideFound = true; break; } if (peptideFound) { break; } } } using (var writer = File.CreateText(matchPath)) { writer.WriteLine("Charge\tpmz\tscan\tNET\t"); foreach (var feature in peptideList) { writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan, feature.Net); } } } }