public IEnumerable <UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return(features); }
/// <summary> /// Loads MS Features from a CSV file or existing database. /// </summary> /// <returns></returns> public static List<MSFeatureLight> LoadMsFeatureData(string path) { var msFeatures = new List<MSFeatureLight>(); var extension = Path.GetExtension(path); if (extension == null) return msFeatures; extension = extension.ToUpper(); switch (extension) { case ".PEK": throw new NotImplementedException("Support for .PEK files is not available at this time"); // var pekReader = new PEKFileReader(); // var pekMsFeatures = pekReader.ReadFile(path); // msFeatures.AddRange(pekMsFeatures); // UpdateStatus("Loaded features from the PEK file."); break; default: var reader = new MsFeatureLightFileReader {Delimiter = ','}; var newMsFeatures = reader.ReadFile(path); msFeatures.AddRange(newMsFeatures); UpdateStatus("Loaded features from the CSV files."); break; } return msFeatures; }
/// <summary> /// Loads MS Features from a CSV file or existing database. /// </summary> /// <returns></returns> public static List <MSFeatureLight> LoadMsFeatureData(string path, DeconToolsIsosFilterOptions isosFilterOptions) { var msFeatures = new List <MSFeatureLight>(); var extension = Path.GetExtension(path); if (extension == null) { return(msFeatures); } extension = extension.ToUpper(); switch (extension) { case ".PEK": throw new NotImplementedException("Support for .PEK files is not available at this time"); // var pekReader = new PEKFileReader(); // var pekMsFeatures = pekReader.ReadFile(path); // msFeatures.AddRange(pekMsFeatures); // UpdateStatus("Loaded features from the PEK file."); break; default: var reader = new MsFeatureLightFileReader { Delimiter = ',' }; reader.IsosFilteroptions = isosFilterOptions; var newMsFeatures = reader.ReadFile(path); msFeatures.AddRange(newMsFeatures); UpdateStatus("Loaded features from the CSV files."); break; } return(msFeatures); }
public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount) { // Get the absolute path var path = GetPath(relativePath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); IScanSummaryProvider provider = null; var rawFilePath = path.Replace("_isos.csv", ".raw"); UpdateStatus("Using raw data to create better features."); var providerCache = new ScanSummaryProviderCache(); provider = providerCache.GetScanSummaryProvider(rawFilePath, 1); var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); // Work on total feature count here. Assert.Greater(features.Count, 0); Assert.AreEqual(expectedFeatureCount, features.Count); return(features); }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) continue; foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public IEnumerable<UMCLight> TestUmcFeatures(string path) { var reader = new MsFeatureLightFileReader {Delimeter = ","}; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder { MaximumNet = .005, MaximumScan = 50 }; var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); var features = finder.FindFeatures(newMsFeatures.ToList(), options, null); // Work on total feature count here. Assert.Greater(features.Count, 0); return features; }
public void CreateFeaturesTest(string relativePath, string outputPath) { var path = GetPath(relativePath); var tolerances = new FeatureTolerances { Mass = 13, Net = .01, DriftTime = 30, FragmentationWindowSize = .5 }; var reader = new MsFeatureLightFileReader(); var rawFeatures = reader.ReadFile(path); var msFilterOptions = new MsFeatureFilteringOptions { ChargeRange = new FilterRange(1,6), MinimumIntensity = 200000, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions); var finder = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> {Tolerances = tolerances}; finder.Progress += (sender, args) => Console.WriteLine(args.Message); var features = finder.Cluster(rawFeatures.ToList()); var filterOptions = new LcmsFeatureFilteringOptions { FeatureLengthRange = new FilterRange { Maximum = 30, Minimum = 10 } }; features = LcmsFeatureFilters.FilterFeatures(features, filterOptions); Console.WriteLine(@"Found - {0} features", features.Count); using (var writer = File.CreateText(GetPath(outputPath))) { var index = 0; foreach (var feature in features) { feature.Id = index++; feature.CalculateStatistics(ClusterCentroidRepresentation.Mean); writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}", TextDelimiter, feature.Net, feature.ChargeState, feature.Mz, feature.Scan, feature.MassMonoisotopic, feature.MassMonoisotopicAligned, feature.Id, feature.ScanStart, feature.ScanEnd, feature.ScanAligned ); } } }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, Features = new InputFile {Path = baseline}, Raw = new InputFile {Path = baselineRaw}, Sequence = new InputFile {Path = sequencePath} }; Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, Features = new InputFile {Path = features}, Raw = new InputFile {Path = featuresRaw}, Sequence = new InputFile {Path = sequencePath} }; var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List<MsmsCluster> clusters = null; using (var rawReader = new ThermoRawDataFileReader()) { rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId); rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, rawReader); Console.WriteLine("Found {0} Total Clusters", clusters.Count); } if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff) { var reader = new MsFeatureLightFileReader { Delimeter = "," }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .05 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .002, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) { throw new NullReferenceException("The feature list came back empty. This is a problem."); } var dirPath = Path.GetDirectoryName(path); if (dirPath != null) { using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); if (chargeMap.Keys.Count < 2) { continue; } foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } var charges = chargeMap.Keys.ToList(); for (var i = 0; i < charges.Count; i++) { for (var j = i; j < charges.Count; j++) { var x = chargeMap[charges[i]]; var y = chargeMap[charges[j]]; var diff = x.MinScan() - y.MinScan(); if (diff > maxScanDiff) { throw new Exception( "There is a problem with the feature finder across charge states"); } } } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile) { // Read mass tags. var massTagReader = new LiquidResultsFileLoader(liquidResultsPath); var massTags = massTagReader.LoadDatabase(); // Get identifications - this rereads the liquid results file, but I'm leaving it that way // for now because this is just a test. var scansToIds = this.GetIds(liquidResultsPath); // Read raw data file. var spectraProviderCache = new SpectraProviderCache(); var spectraProvider = spectraProviderCache.GetSpectraProvider(rawFile); // Read isos features var isosReader = new MsFeatureLightFileReader(); isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions { MaximumIsotopicFit = 0.15 }; var msFeatures = isosReader.ReadFile(isosFile).ToList(); // Get LCMS features var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider); var lcmsFeatures = msFeatureClusterer.Convert(msFeatures); lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; }); // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping // between LCMS features and clusters. var clusters = new List <UMCClusterLight> { Capacity = lcmsFeatures.Count }; foreach (var lcmsFeature in lcmsFeatures) { var cluster = new UMCClusterLight(lcmsFeature); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); clusters.Add(cluster); } // Do STAC AMT matching var stacAdapter = new STACAdapter <UMCClusterLight> { Options = new FeatureMatcherParameters { ShouldCalculateShiftFDR = false, UsePriors = true, UseEllipsoid = true, UseDriftTime = false, ShouldCalculateSTAC = true, } }; var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags); // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID, // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs. var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>()); foreach (var amtMatch in amtMatches) { var cluster = amtMatch.Observed; var massTag = amtMatch.Target; ms1Matches[cluster].Add(new Protein { Name = massTag.ProteinName, Sequence = massTag.PeptideSequence, ChemicalFormula = massTag.PeptideSequence }); } // Now we need to backtrack MS/MS identifications -> clusters var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >(); foreach (var cluster in clusters) { ms2Matches.Add(cluster, new List <Protein>()); foreach (var lcmsFeature in cluster.UmcList) { foreach (var msFeature in lcmsFeature.MsFeatures) { foreach (var msmsFeature in msFeature.MSnSpectra) { if (scansToIds.ContainsKey(msmsFeature.Scan)) { ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]); } } } } } // How many clusters have IDs from MS/MS? var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any()); var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any()); int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications. // Finally compare the MS1 IDs to the MS2 IDs. foreach (var cluster in clusters) { // For now only comparing by name var ms1Ids = ms1Matches[cluster]; var ms1Lipids = ms1Ids.Select(id => id.Name); var ms2Ids = ms2Matches[cluster]; var ms2Lipids = ms2Ids.Select(id => id.Name); // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster. var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid)); var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid)); overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count(); // Write Results if (ms1OnlyIds.Any() || ms2OnlyIds.Any()) { Console.WriteLine("Cluster {0}:", cluster.Id); if (ms1OnlyIds.Any()) { Console.WriteLine("\tMs1 Only IDs:"); foreach (var id in ms1OnlyIds) { Console.WriteLine("\t\t{0}", id); } } if (ms2OnlyIds.Any()) { Console.WriteLine("\tMs2 Only IDs:"); foreach (var id in ms2OnlyIds) { Console.WriteLine("\t\t{0}", id); } } } } Console.WriteLine("Overlap: {0}", overlapCount); }
public void TestUmcFeatures(string relativePath, string relativeRawPath) { // Get absolute paths var path = GetPath(relativePath); var rawPath = GetPath(relativeRawPath); var reader = new MsFeatureLightFileReader { Delimiter = ',' }; var newMsFeatures = reader.ReadFile(path); var finder = new UmcTreeFeatureFinder(); var featureTolerances = new FeatureTolerances { Mass = 12, Net = .04 }; var options = new LcmsFeatureFindingOptions(featureTolerances) { MaximumNetRange = .003, MaximumScanRange = 50 }; var provider = RawLoaderFactory.CreateFileReader(rawPath); provider.AddDataFile(rawPath, 0); var start = DateTime.Now; IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider); var end = DateTime.Now; Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds); if (features == null) throw new NullReferenceException("The feature list came back empty. This is a problem."); var dirPath = Path.GetDirectoryName(path); if (dirPath != null) using ( var writer = File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv"))) ) { foreach (var feature in features) { writer.WriteLine(); writer.WriteLine("Feature {0}", feature.Id); var chargeMap = feature.CreateChargeMap(); foreach (var charge in chargeMap.Keys) { writer.WriteLine(); foreach (var msFeature in chargeMap[charge]) { var count = msFeature.MSnSpectra.Count; writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan, msFeature.Abundance, count); } } } } // Work on total feature count here. Assert.Greater(features.Count(), 0); }
public void CreateFeaturesTest(string relativePath, string outputPath) { var path = GetPath(relativePath); var tolerances = new FeatureTolerances { Mass = 13, Net = .01, DriftTime = 30, FragmentationWindowSize = .5 }; var reader = new MsFeatureLightFileReader(); var rawFeatures = reader.ReadFile(path); var msFilterOptions = new MsFeatureFilteringOptions { ChargeRange = new FilterRange(1, 6), MinimumIntensity = 200000, ShouldUseDeisotopingFilter = true, ShouldUseIntensityFilter = true }; rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions); var finder = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight> { Tolerances = tolerances }; finder.Progress += (sender, args) => Console.WriteLine(args.Message); var features = finder.Cluster(rawFeatures.ToList()); var filterOptions = new LcmsFeatureFilteringOptions { FeatureLengthRangeScans = new FilterRange { Maximum = 30, Minimum = 10 } }; features = LcmsFeatureFilters.FilterFeatures(features, filterOptions); Console.WriteLine(@"Found - {0} features", features.Count); using (var writer = File.CreateText(GetPath(outputPath))) { var index = 0; foreach (var feature in features) { feature.Id = index++; feature.CalculateStatistics(ClusterCentroidRepresentation.Mean); writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}", TextDelimiter, feature.Net, feature.ChargeState, feature.Mz, feature.Scan, feature.MassMonoisotopic, feature.MassMonoisotopicAligned, feature.Id, feature.ScanStart, feature.ScanEnd, feature.ScanAligned ); } } }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, }; baselineInfo.InputFiles.Add(new InputFile { Path = baseline, FileType = InputFileType.Features }); baselineInfo.InputFiles.Add(new InputFile { Path = baselineRaw, FileType = InputFileType.Raw }); baselineInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, }; aligneeInfo.InputFiles.Add(new InputFile { Path = features, FileType = InputFileType.Features }); aligneeInfo.InputFiles.Add(new InputFile { Path = featuresRaw, FileType = InputFileType.Raw }); aligneeInfo.InputFiles.Add(new InputFile { Path = sequencePath, FileType = InputFileType.Sequence }); var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List <UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List <MsmsCluster> clusters = null; var spectraProviderCache = new SpectraProviderCache(); spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId); spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, spectraProviderCache); Console.WriteLine("Found {0} Total Clusters", clusters.Count); if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }