/// <summary> /// Retrieves a list of features. /// </summary> /// <param name="rawFile"></param> /// <param name="featureFile"></param> /// <returns></returns> public List<UMCLight> FindFeatures(string rawFile, string featureFile) { List<UMCLight> features; using (ISpectraProvider raw = new ThermoRawDataFileReader()) { // Read the raw file summary data... raw.AddDataFile(rawFile, 0); var info = new DatasetInformation(); info.Features = new InputFile(); info.Features.Path = featureFile; var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); // Load and create features var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path); var provider = RawLoaderFactory.CreateFileReader(rawFile); features = finder.FindFeatures(msFeatures, options, provider); } return features; }
/// <summary> /// Constructs a raw data file reader for reading the instrument (or equivalent) mass spectra. /// </summary> /// <param name="name"></param> /// <param name="register"></param> /// <returns></returns> public static ISpectraProvider CreateFileReader(string name) { if (name == null) return null; ISpectraProvider reader = null; var extension = Path.GetExtension(name); // Otherwise create a new one. switch (extension.ToLower()) { case ".raw": reader = new ThermoRawDataFileReader(); break; case ".mzxml": reader = new MzXMLReader(); break; case ".mzml": reader = new InformedProteomicsReader(); break; case ".gz": if (name.ToLower().EndsWith(".mzml.gz")) { reader = new InformedProteomicsReader(); } break; default: reader = new InformedProteomicsReader(); break; } return reader; }
public void ClusterMsMs(string name, string resultPath, string sequencePath, SequenceFileType type, string baseline, string features, double percent) { var baselineRaw = baseline.Replace("_isos.csv", ".raw"); var featuresRaw = features.Replace("_isos.csv", ".raw"); Console.WriteLine("Create Baseline Information"); var baselineInfo = new DatasetInformation { DatasetId = 0, Features = new InputFile {Path = baseline}, Raw = new InputFile {Path = baselineRaw}, Sequence = new InputFile {Path = sequencePath} }; Console.WriteLine("Create Alignee Information"); var aligneeInfo = new DatasetInformation { DatasetId = 1, Features = new InputFile {Path = features}, Raw = new InputFile {Path = featuresRaw}, Sequence = new InputFile {Path = sequencePath} }; var reader = new MsFeatureLightFileReader(); Console.WriteLine("Reading Baseline Features"); var baselineMsFeatures = reader.ReadFile(baseline).ToList(); baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); Console.WriteLine("Reading Alignee Features"); var aligneeMsFeatures = reader.ReadFile(features).ToList(); aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased); var tolerances = new FeatureTolerances { Mass = 8, Net = .005 }; var options = new LcmsFeatureFindingOptions(tolerances); Console.WriteLine("Detecting Baseline Features"); var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null); Console.WriteLine("Detecting Alignee Features"); var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null); Console.WriteLine("Managing baseline and alignee features"); baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId); aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId); Console.WriteLine("Clustering MS/MS Spectra"); var clusterer = new MSMSClusterer(); clusterer.MzTolerance = .5; clusterer.MassTolerance = 6; clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer { TopPercent = percent }; clusterer.SimilarityTolerance = .5; clusterer.ScanRange = 905; clusterer.Progress += clusterer_Progress; var allFeatures = new List<UMCLight>(); allFeatures.AddRange(baselineFeatures); allFeatures.AddRange(aligneeFeatures); List<MsmsCluster> clusters = null; using (var rawReader = new ThermoRawDataFileReader()) { rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId); rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId); clusters = clusterer.Cluster(allFeatures, rawReader); Console.WriteLine("Found {0} Total Clusters", clusters.Count); } if (clusters != null) { var now = DateTime.Now; var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); writer.WriteLine(); foreach (var cluster in clusters) { var scanData = ""; if (cluster.Features.Count == 2) { foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); } scanData += string.Format("{0}", cluster.MeanScore); writer.WriteLine(scanData); } } } testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt", name, now.Year, now.Month, now.Day, now.Hour, now.Minute, now.Second, resultPath ); using (TextWriter writer = File.CreateText(testResultPath)) { writer.WriteLine("[Data]"); writer.WriteLine("{0}", baseline); writer.WriteLine("{0}", features); writer.WriteLine("[Scans]"); foreach (var cluster in clusters) { var scanData = ""; var data = ""; foreach (var feature in cluster.Features) { scanData += string.Format("{0},", feature.Scan); data += string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } } writer.WriteLine(scanData + "," + data); } writer.WriteLine(""); writer.WriteLine(""); writer.WriteLine("[Clusters]"); foreach (var cluster in clusters) { writer.WriteLine("cluster id, cluster score"); writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore); writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides"); foreach (var feature in cluster.Features) { var data = string.Format("{0},{1},{2},{3},{4},{5}", feature.GroupId, feature.Id, feature.MassMonoisotopic, feature.Mz, feature.ChargeState, feature.Scan); foreach (var spectrum in feature.MSnSpectra) { foreach (var peptide in spectrum.Peptides) { data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score); } } writer.WriteLine(data); } } } } }
private MSSpectra GetSpectrum(string path, int scan) { ISpectraProvider reader = new ThermoRawDataFileReader(); reader.AddDataFile(path, 0); return GetSpectrum(reader, scan, 0); }