/// <summary> /// </summary> /// <param name="scan"></param> /// <param name="path"></param> /// <param name="mz"></param> /// <param name="mzRange"></param> /// <returns></returns> public static List <XYData> GetParentSpectrum(string path, int scan, double minMz, double maxMz) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return(null); } List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 1, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return(null); } if (spectrum == null) { return(null); } var data = (from x in spectrum where x.X > minMz && x.X < maxMz select x).ToList(); return(data); }
/// <summary> /// </summary> /// <param name="scan"></param> /// <param name="path"></param> /// <param name="mz"></param> /// <param name="mzRange"></param> /// <returns></returns> public static List<XYData> GetParentSpectrum(string path, int scan, double minMz, double maxMz) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return null; } List<XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 0, 1, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return null; } if (spectrum == null) { return null; } var data = (from x in spectrum where x.X > minMz && x.X < maxMz select x).ToList(); return data; }
/// <summary> /// Filters the list of MS Features that may be from MS/MS deisotoped data. /// </summary> public List <MSFeatureLight> Filter(List <MSFeatureLight> msFeatures, IScanSummaryProvider provider, ref DatasetInformation dataset) { string rawPath = dataset.RawFile.Path; if (rawPath == null || string.IsNullOrWhiteSpace(rawPath)) { return(msFeatures); } // First find all unique scans var scanMap = new Dictionary <int, bool>(); foreach (var feature in msFeatures) { if (!scanMap.ContainsKey(feature.Scan)) { // Assume all scans are parents scanMap.Add(feature.Scan, true); } } // Then parse each to figure out if this is true. var fullScans = new Dictionary <int, bool>(); var scanTimes = dataset.ScanTimes; if (provider == null) { UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath))); } else { UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath))); foreach (var scan in scanMap.Keys) { ScanSummary summary = provider.GetScanSummary(scan); if (summary == null) { continue; } if (summary.MsLevel == 1) { fullScans.Add(scan, true); } if (scanTimes.ContainsKey(scan)) { scanTimes[scan] = summary.Time; } else { scanTimes.Add(scan, summary.Time); } } dataset.ScanTimes = scanTimes; } return(msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList()); }
private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var peaks = reader.GetRawSpectra(scan, 2, out summary); var spectrum = new MSSpectra(); spectrum.Peaks = peaks; return(spectrum); }
private ScanSummary LoadScanSummary(int scan) { // Peaks needed to calculate Total ion current var spec = this.LcMsRun.GetSpectrum(scan, true); var minEt = this.LcMsRun.GetElutionTime(this.LcMsRun.MinLcScan); var maxEt = this.LcMsRun.GetElutionTime(this.LcMsRun.MaxLcScan); var timeDiff = maxEt - minEt; var summary = new ScanSummary { MsLevel = spec.MsLevel, Net = (spec.ElutionTime - minEt) / timeDiff, Time = spec.ElutionTime, Scan = spec.ScanNum, TotalIonCurrent = Convert.ToInt64(spec.TotalIonCurrent), // Only used in MultiAlignCore.Algorithms.Chromatograms.XicCreator.CreateXic(...) PrecursorMz = 0, CollisionType = CollisionType.Other, DatasetId = this.GroupId, }; if (spec is ProductSpectrum) { var pspec = spec as ProductSpectrum; if (pspec.IsolationWindow.MonoisotopicMass != null) { summary.PrecursorMz = pspec.IsolationWindow.MonoisotopicMass.Value; } switch (pspec.ActivationMethod) { case ActivationMethod.CID: summary.CollisionType = CollisionType.Cid; break; case ActivationMethod.HCD: summary.CollisionType = CollisionType.Hcd; break; case ActivationMethod.ETD: summary.CollisionType = CollisionType.Etd; break; case ActivationMethod.ECD: summary.CollisionType = CollisionType.Ecd; break; case ActivationMethod.PQD: //summary.CollisionType = CollisionType.Hid; // HID? what is HID? break; } } this._summary.ScanMetaData.Add(scan, summary); return(summary); }
public static MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var spectrum = reader.GetSpectrum(scan, group, 2, out summary, true); if (ShouldLogScale) { foreach (var peak in spectrum.Peaks) { peak.Y = Math.Log(peak.Y, 2); } } return(spectrum); }
public MSSpectra GetSpectrum(int scan, int group, int scanLevel, out ScanSummary summary, bool loadPeaks) { if (m_spectraMap.ContainsKey(scan)) { summary = m_spectraMap[scan].ScanMetaData; if (loadPeaks) { m_spectraMap[scan].Peaks = GetRawSpectra(scan, group, scanLevel, out summary); } return(m_spectraMap[scan]); } return(m_reader.GetSpectrum(scan, group, scanLevel, out summary, loadPeaks)); }
/// <summary> /// Creates an XIC from the m/z values provided. /// </summary> /// <param name="mz"></param> /// <param name="massError"></param> /// <param name="minScan"></param> /// <param name="maxScan"></param> /// <param name="provider"></param> /// <returns></returns> public IEnumerable <MSFeatureLight> CreateXic(double mz, double massError, int minScan, int maxScan, ISpectraProvider provider) { var newFeatures = new List <MSFeatureLight>(); var lower = FeatureLight.ComputeDaDifferenceFromPPM(mz, massError); var higher = FeatureLight.ComputeDaDifferenceFromPPM(mz, -massError); for (var i = minScan; i < maxScan; i++) { List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(i, 0, 1, out summary); } catch { } if (spectrum == null) { continue; } var data = (from x in spectrum where x.X > lower && x.X < higher select x).ToList(); var summedIntensity = data.Sum(x => x.Y); var newFeature = new MSFeatureLight { Scan = i, Net = i, Abundance = Convert.ToInt64(summedIntensity) }; newFeatures.Add(newFeature); } return(newFeatures); }
public ScanSummary ScanRepo(IProgress <ProgressMessage> progress, CancellationToken cancellationToken, string rootFolder, bool includeTotal = true, string outputFile = null) { int projectCount = 0; //scan all projects List <Project> projects = new List <Project>(); int i = 0; foreach (DirectoryInfo folder in new DirectoryInfo(rootFolder).GetDirectories()) { i++; projects.AddRange(SearchFolderForProjectFiles(folder.FullName)); if (projectCount != projects.Count) { projectCount = projects.Count; if (progress != null) { progress.Report(new ProgressMessage { ProjectsProcessed = projectCount, RootProjectsProcessed = i }); } } } //Aggregate results List <FrameworkSummary> frameworkSummary = AggregateFrameworks(projects, includeTotal); List <LanguageSummary> languageSummary = AggregateLanguages(projects, includeTotal); //Create an output CSV file if (string.IsNullOrEmpty(outputFile) == false) { OutputDataToCSVFile(projects, outputFile); } //Setup the scan summary ScanSummary scanSummary = new ScanSummary { //ReposCount = 0,// new DirectoryInfo(rootFolder).GetDirectories().Length, ProjectCount = projectCount, FrameworkSummary = frameworkSummary, LanguageSummary = languageSummary }; return(scanSummary); }
public List <XYData> GetRawSpectra(int scan, int scanLevel, out ScanSummary summary) { if (this._lcmsRun == null) { this.LoadLcmsRun(); } summary = this.GetScanSummary(scan); if (summary.MsLevel != scanLevel && scanLevel > 0) { return(null); } var data = this.LoadSpectra(scan); return(data); }
/// <summary> /// If scan summary is not known from the scans file/databse, /// calculate the retention time by interpolating between the two /// nearest scans. /// </summary> /// <param name="scanNumber">The target scan number.</param> /// <returns>The scan summary for the target scan number.</returns> private ScanSummary InterpolateScanSummary(int scanNumber) { // Exact match not found; find the elution time of the nearest scan var indexNearest = ~this.knownScanNumbers.BinarySearch(scanNumber); ScanSummary scanSummary; if (indexNearest <= 0) { scanSummary = this.summary.ScanMetaData[this.knownScanNumbers[0]]; } else if (indexNearest >= this.knownScanNumbers.Count) { scanSummary = this.summary.ScanMetaData[this.knownScanNumbers[this.knownScanNumbers.Count - 1]]; } else { var lowScanSummary = this.summary.ScanMetaData[this.knownScanNumbers[indexNearest - 1]]; var highScanSummary = this.summary.ScanMetaData[this.knownScanNumbers[indexNearest]]; var lowScan = lowScanSummary.Scan; var highScan = highScanSummary.Scan; var lowRt = lowScanSummary.Time; var highRt = highScanSummary.Time; var retentionTime = lowRt + (highRt - lowRt) * ((double)(scanNumber - lowScan) / (highScan - lowScan)); scanSummary = new ScanSummary { DatasetId = this.GroupId, Scan = scanNumber, Time = retentionTime, Net = this.CalculateNet(retentionTime), MsLevel = 1, CollisionType = CollisionType.None }; } return(scanSummary); }
public MSSpectra GetSpectrum(int scan, int scanLevel, out ScanSummary summary, bool loadPeaks) { summary = this.GetScanSummary(scan); var spectrum = new MSSpectra { MsLevel = summary.MsLevel, RetentionTime = summary.Time, Scan = scan, PrecursorMz = summary.PrecursorMz, TotalIonCurrent = summary.TotalIonCurrent, CollisionType = summary.CollisionType }; // Need to make this a standard type of collision based off of the data. if (loadPeaks) { spectrum.Peaks = this.LoadSpectra(scan); } return(spectrum); }
public static List<XYData> GetDaughterSpectrum(string path, int scan) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return null; } List<XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 0, 2, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return null; } return spectrum; }
public static Dictionary<int, ScanSummary> ReadCache(string path) { var cache = new Dictionary<int, ScanSummary>(); var lines = File.ReadAllLines(path); for (var i = CONST_HEADER_SIZE; i < lines.Length; i++) { var summary = new ScanSummary(); var line = lines[i]; line = line.Trim().Replace(" ", ""); var data = line.Split('\t'); if (data.Length == 3) { summary.Scan = Convert.ToInt32(data[0]); summary.PrecursorMz = Convert.ToDouble(data[1]); summary.MsLevel = Convert.ToInt32(data[2]); cache.Add(summary.Scan, summary); } } return cache; }
public static Dictionary <int, ScanSummary> ReadCache(string path) { var cache = new Dictionary <int, ScanSummary>(); var lines = File.ReadAllLines(path); for (var i = CONST_HEADER_SIZE; i < lines.Length; i++) { var summary = new ScanSummary(); var line = lines[i]; line = line.Trim().Replace(" ", ""); var data = line.Split('\t'); if (data.Length == 3) { summary.Scan = Convert.ToInt32(data[0]); summary.PrecursorMz = Convert.ToDouble(data[1]); summary.MsLevel = Convert.ToInt32(data[2]); cache.Add(summary.Scan, summary); } } return(cache); }
public static List <XYData> GetDaughterSpectrum(string path, int scan) { ISpectraProvider provider = GetProvider(path); if (provider == null) { return(null); } List <XYData> spectrum = null; try { var summary = new ScanSummary(); spectrum = provider.GetRawSpectra(scan, 2, out summary); } catch { Logger.PrintMessage("Could not load the raw spectra"); return(null); } return(spectrum); }
public List <XYData> GetRawSpectra(int scan, int group, out ScanSummary summary) { return(m_reader.GetRawSpectra(scan, group, out summary)); }
public MSSpectra GetSpectrum(int scan, int groupId, int scanLevel, out ScanSummary summary, bool loadPeaks) { // Get the RawFileReader for this group var rawReader = GetReaderForGroup(groupId); ValidateScanNumber(scan, rawReader); FinniganFileReaderBaseClass.udtScanHeaderInfoType header; summary = GetScanSummary(scan, rawReader, out header); var spectrum = new MSSpectra { MsLevel = header.MSLevel, RetentionTime = header.RetentionTime, Scan = scan, PrecursorMz = header.ParentIonMZ, TotalIonCurrent = header.TotalIonCurrent, CollisionType = summary.CollisionType }; // Need to make this a standard type of collision based off of the data. if (loadPeaks) { spectrum.Peaks = LoadRawSpectra(rawReader, scan); } return spectrum; }
/// <summary> /// Gets the raw data from the data file. /// </summary> /// <param name="scan"></param> /// <param name="groupId">File Group ID</param> /// <param name="summary"></param> /// <returns></returns> public List<XYData> GetRawSpectra(int scan, int groupId, out ScanSummary summary) { return GetRawSpectra(scan, groupId, -1, out summary); }
private clsSpectrumInfo GetScanInfo(int scan, int groupId, out ScanSummary summary) { if (!m_dataFiles.ContainsKey(groupId)) { throw new Exception("The group-dataset ID provided was not found."); } // If we dont have a reader, then create one for this group // next time, it will be available and we won't have to waste time // opening the file. if (!m_readers.ContainsKey(groupId)) { var path = m_dataFiles[groupId]; var reader = new clsMzXMLFileAccessor(); m_readers.Add(groupId, reader); var opened = reader.OpenFile(path); if (!opened) { throw new IOException("Could not open the mzXML file " + path); } } var rawReader = m_readers[groupId]; var totalScans = rawReader.ScanCount; var info = new clsSpectrumInfo(); rawReader.GetSpectrumByScanNumber(scan, ref info); summary = new ScanSummary { Bpi = Convert.ToInt64(info.BasePeakIntensity), BpiMz = info.BasePeakMZ, MsLevel = info.MSLevel, PrecursorMz = info.ParentIonMZ, TotalIonCurrent = Convert.ToInt64(info.TotalIonCurrent) }; return info; }
public List<XYData> GetRawSpectra(int scan, int groupId, int scanLevel, out ScanSummary summary) { List<XYData> spectrum = null; var info = GetScanInfo(scan, groupId, out summary); if (info == null) return null; if (info.MSLevel == scanLevel || scanLevel < 1) { spectrum = new List<XYData>(); for (var j = 0; j < info.MZList.Length; j++) { spectrum.Add(new XYData(info.MZList[j], info.IntensityList[j])); } } return spectrum; }
/// <summary> /// Creates SIC's mapped by charge state for the MS Features in the feature. /// </summary> /// <param name="feature"></param> /// <param name="provider">Object that can read data from a raw file or data source.</param> /// <returns></returns> public static Dictionary <int, List <XYZData> > CreateChargeSIC(this UMCLight feature, ISpectraProvider provider) { var chargeMap = feature.CreateChargeMap(); var sicMap = new Dictionary <int, List <XYZData> >(); foreach (var charge in chargeMap.Keys) { chargeMap[charge].Sort(delegate(MSFeatureLight x, MSFeatureLight y) { return(x.Scan.CompareTo(y.Scan)); } ); var data = chargeMap[charge].ConvertAll(x => new XYZData(x.Scan, x.Abundance, x.Mz)); sicMap.Add(charge, data); } if (provider != null) { // Creates an SIC map for a given charge state of the feature. foreach (var charge in sicMap.Keys) { var data = sicMap[charge]; // The data is alread sorted. var minScan = int.MaxValue; var maxScan = int.MinValue; var mzValues = new List <double>(); foreach (var x in data) { mzValues.Add(x.Z); minScan = Math.Min(minScan, Convert.ToInt32(x.X)); maxScan = Math.Max(maxScan, Convert.ToInt32(x.X)); } mzValues.Sort(); double mz = 0; var mid = Convert.ToInt32(mzValues.Count / 2); mz = mzValues[mid]; minScan -= 20; maxScan += 20; // Build the SIC var intensities = new List <XYZData>(); for (var scan = minScan; scan < maxScan; scan++) { var summary = new ScanSummary(); var spectrum = provider.GetRawSpectra(scan, 1, out summary); double intensity = 0; var minDistance = double.MaxValue; var index = -1; for (var i = 0; i < spectrum.Count; i++) { var distance = spectrum[i].X - mz; if (distance < minDistance) { index = i; minDistance = distance; } } if (index >= 0) { intensity = spectrum[index].Y; } var newPoint = new XYZData(scan, intensity, mz); intensities.Add(newPoint); } sicMap[charge] = intensities; } } return(sicMap); }
private XRawFileIO GetScanSummaryAndReader(int scan, int groupId, out ScanSummary summary) { // Get the RawFileReader for this group var rawReader = GetReaderForGroup(groupId); ValidateScanNumber(scan, rawReader); FinniganFileReaderBaseClass.udtScanHeaderInfoType header; summary = GetScanSummary(scan, rawReader, out header); return rawReader; }
public void CreateUMCClusterLight(string databasePath, bool indexDatabase) { // If the database is not index then do so...but before the session to the db is opened. if (indexDatabase) { DatabaseIndexer.IndexClusters(databasePath); DatabaseIndexer.IndexFeatures(databasePath); } // This is a factory based method that creates a set of data access providers used throughout MultiAlign var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false); // If you just wanted the clusters you could do this: // 1. Connect to the database //NHibernateUtil.ConnectToDatabase(databasePath, false); // 2. Then extract all of the clusters //IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); //List<UMCClusterLight> clusters = clusterCache.FindAll(); var clusters = providers.ClusterCache.FindAll(); var shouldGetMsFeatures = true; var shouldGetMsMsFeatures = true; var shouldGetRawData = false; // This gets all of the dataset information and maps to a dictionary...if you want the raw data // otherwise comment this out. var datasets = providers.DatasetCache.FindAll(); var datasetMap = new Dictionary <int, DatasetInformation>(); datasets.ForEach(x => datasetMap.Add(x.DatasetId, x)); foreach (var cluster in clusters) { cluster.ReconstructUMCCluster(providers, true, false, shouldGetMsFeatures, shouldGetMsMsFeatures); foreach (var feature in cluster.Features) { foreach (var msFeature in feature.Features) { foreach (var spectrumMetaData in msFeature.MSnSpectra) { // then you can do stuff with the ms/ms spectra // If you had the path to the raw file, you could create a reader for you to extract the MS/MS spectra // This supports mzXML and .RAW Thermo files based on the file extension. if (shouldGetRawData) { DatasetInformation info = null; var hasKey = datasetMap.TryGetValue(spectrumMetaData.GroupId, out info); if (hasKey) { if (info.RawFile != null) { // This might seem kind of klunky, but it's called a bridge, this way I can access // MS/MS spectra from PNNLOmics without having to reference any of the Thermo DLL's // Nor support file reading capability. This is also nice because I don't have to load // several MS/MS spectra when analyzing large datasets for my spectral clustering work. var rawReader = new InformedProteomicsReader(spectrumMetaData.GroupId, info.RawFile.Path); // Then grab the actual spectrum... var summary = new ScanSummary(); var spectrum = rawReader.GetRawSpectra(spectrumMetaData.Scan, 2, out summary); // Then do what you want... // Profit??? } } } } } } } }
private IEnumerable <UMCLight> RetrieveFeatures(int datasetId, FeatureDataAccessProviders providers) { var features = providers.FeatureCache.FindByDatasetId(datasetId); var spectra = providers.MSnFeatureCache.FindByDatasetId(datasetId); if (spectra == null) { throw new ArgumentNullException(@"There were no MS/MS spectra in the database"); } var sequences = providers.DatabaseSequenceCache.FindAll(); var sequenceMaps = providers.SequenceMsnMapCache.FindByDatasetId(datasetId); var spectraMaps = providers.MSFeatureToMSnFeatureCache.FindByDatasetId(datasetId); var msFeatures = providers.MSFeatureCache.FindByDatasetId(datasetId); // Make a one pass through each enumerable list, // then use the maps to join the data together var dictFeatures = new Dictionary <int, UMCLight>(); var dictSpectra = new Dictionary <int, MSSpectra>(); var dictPeptide = new Dictionary <int, Peptide>(); var dictMsFeatures = new Dictionary <int, MSFeatureLight>(); foreach (var sequence in sequences) { if (sequence.GroupId != datasetId) { continue; } var peptide = new Peptide { Sequence = sequence.Sequence, Id = sequence.Id, }; dictPeptide.Add(peptide.Id, peptide); } msFeatures.ForEach(x => dictMsFeatures.Add(x.Id, x)); features.ForEach(x => dictFeatures.Add(x.Id, x)); spectra.ForEach(x => dictSpectra.Add(x.Id, x)); var count = 0; // Map the MSMS foreach (var map in sequenceMaps) { MSSpectra spectrum; Peptide peptide; var workedSpectra = dictSpectra.TryGetValue(map.MsnFeatureId, out spectrum); var workedPeptide = dictPeptide.TryGetValue(map.SequenceId, out peptide); if (workedSpectra && workedPeptide) { spectrum.Peptides.Add(peptide); peptide.Spectrum = spectrum; count++; } } Console.WriteLine("Mapped {0} peptides to spectra", count); count = 0; // Map the spectra.... foreach (var map in spectraMaps) { UMCLight feature; MSSpectra spectrum; MSFeatureLight msFeature; var workedFeatures = dictFeatures.TryGetValue(map.LCMSFeatureID, out feature); var workedSpectra = dictSpectra.TryGetValue(map.MSMSFeatureID, out spectrum); var workedMsFeature = dictMsFeatures.TryGetValue(map.MSFeatureID, out msFeature); if (!workedFeatures || !workedSpectra || !workedMsFeature) { continue; } var metaData = new ScanSummary { MsLevel = 2, PrecursorMz = spectrum.PrecursorMz, Scan = spectrum.Scan }; spectrum.ScanMetaData = metaData; msFeature.MSnSpectra.Add(spectrum); spectrum.ParentFeature = msFeature; feature.AddChildFeature(msFeature); msFeature.Umc = feature; count++; } Console.WriteLine("Mapped {0} spectra to parent Features", count); return(features); }
private LcMsRun GetScanSummaryAndReader(int scan, int groupId, out ScanSummary summary) { // Get the RawFileReader for this group var ipbReader = GetReaderForGroup(groupId); scan = ValidateScanNumber(scan, ipbReader); summary = GetScanSummary(scan, ipbReader); return ipbReader; }
private ScanSummary GetScanSummary(int scan, LcMsRun ipbReader) { var spec = ipbReader.GetSpectrum(scan, true); var summary = new ScanSummary { MsLevel = spec.MsLevel, Time = spec.ElutionTime, Scan = spec.ScanNum, //TotalIonCurrent = Convert.ToInt64(header.TotalIonCurrent), // Only used in PNNLOmics.Algorithms.Chromatograms.XicCreator.CreateXic(...) TotalIonCurrent = spec.Peaks.Select(peak => (long)peak.Intensity).Sum(), // Only used in PNNLOmics.Algorithms.Chromatograms.XicCreator.CreateXic(...) PrecursorMz = 0, CollisionType = CollisionType.Other, }; if (spec is ProductSpectrum) { var pspec = spec as ProductSpectrum; if (pspec.IsolationWindow.MonoisotopicMass != null) { summary.PrecursorMz = pspec.IsolationWindow.MonoisotopicMass.Value; } switch (pspec.ActivationMethod) { case ActivationMethod.CID: summary.CollisionType = CollisionType.Cid; break; case ActivationMethod.HCD: summary.CollisionType = CollisionType.Hcd; break; case ActivationMethod.ETD: summary.CollisionType = CollisionType.Etd; break; case ActivationMethod.ECD: summary.CollisionType = CollisionType.Ecd; break; case ActivationMethod.PQD: //summary.CollisionType = CollisionType.Hid; // HID? what is HID? break; } } return summary; }
public MSSpectra GetSpectrum(int scan, int groupId, int scanLevel, out ScanSummary summary, bool loadPeaks) { // Get the RawFileReader for this group var ipbReader = GetReaderForGroup(groupId); scan = ValidateScanNumber(scan, ipbReader); summary = GetScanSummary(scan, ipbReader); var spectrum = new MSSpectra { MsLevel = summary.MsLevel, RetentionTime = summary.Time, Scan = scan, PrecursorMz = summary.PrecursorMz, TotalIonCurrent = summary.TotalIonCurrent, CollisionType = summary.CollisionType }; // Need to make this a standard type of collision based off of the data. if (loadPeaks) { spectrum.Peaks = LoadSpectra(ipbReader, scan); } return spectrum; }
public List <XYData> GetRawSpectra(int scan, int group, int scanLevel, out ScanSummary summary) { return(m_provider.GetRawSpectra(scan, group, scanLevel, out summary)); }
private MSSpectra GetSpectrum(ISpectraProvider reader, int scan, int group, double mzTolerance = .5) { var summary = new ScanSummary(); var peaks = reader.GetRawSpectra(scan, group, 2, out summary); var spectrum = new MSSpectra(); spectrum.Peaks = peaks; return spectrum; }
public List<XYData> GetRawSpectra(int scan, int group, int scanLevel, out ScanSummary summary) { return m_provider.GetRawSpectra(scan, group, scanLevel, out summary); }
static void Main(string[] args) { //process arguments var result = CommandLine.Parser.Default.ParseArguments <Options>(args) .WithParsed(RunOptions) .WithNotParsed(HandleParseError); //If there is a folder to scan, run the process against it if (string.IsNullOrEmpty(_folder) == false) { //Initialization/ start the timer! Stopwatch timer = new(); timer.Start(); RepoScanner repo = new(); IProgress <ProgressMessage> progress = new Progress <ProgressMessage>(ReportProgress); CancellationTokenSource tokenSource = new(); ScanSummary scanSummary = null; //setup the progress bar int totalProgressBarTicks = new DirectoryInfo(_folder).GetDirectories().Length; ProgressBarOptions options = new() { ProgressCharacter = '─', ProgressBarOnBottom = true }; progressBar = new ProgressBar(totalProgressBarTicks, "Searching for project files...", options); //start processing the work try { scanSummary = repo.ScanRepo(progress, tokenSource.Token, _folder, _includeTotals, _outputFile); } catch (OperationCanceledException ex) { Console.WriteLine(ex.Message, "Canceled"); } catch (Exception ex) { Console.WriteLine(ex.Message, "Error"); } //Show the results ReportProgress(new ProgressMessage()); Console.WriteLine("Processed in " + timer.Elapsed.ToString()); Console.WriteLine("GitHub repo scanned: " + _GitHubOrganization); if (scanSummary != null) { //Console.WriteLine("Repos searched: " + scanSummary.ReposCount); Console.WriteLine("Project files found: " + scanSummary.ProjectCount); Console.WriteLine("======================================"); Console.WriteLine("Unique frameworks: " + (scanSummary.FrameworkSummary.Count - 1).ToString()); ConsoleTable .From <FrameworkSummary>(scanSummary.FrameworkSummary) .Configure(o => o.NumberAlignment = Alignment.Right) .Write(Format.Minimal); Console.WriteLine("======================================"); Console.WriteLine("Unique languages: " + (scanSummary.LanguageSummary.Count - 1).ToString()); ConsoleTable .From <LanguageSummary>(scanSummary.LanguageSummary) .Configure(o => o.NumberAlignment = Alignment.Right) .Write(Format.Minimal); } } }
public List<XYData> GetRawSpectra(int scan, int group, out ScanSummary summary) { return m_reader.GetRawSpectra(scan, group, out summary); }
private IEnumerable<UMCLight> RetrieveFeatures(int datasetId, FeatureDataAccessProviders providers) { var features = providers.FeatureCache.FindByDatasetId(datasetId); var spectra = providers.MSnFeatureCache.FindByDatasetId(datasetId); if (spectra == null) throw new ArgumentNullException(@"There were no MS/MS spectra in the database"); var sequences = providers.DatabaseSequenceCache.FindAll(); var sequenceMaps = providers.SequenceMsnMapCache.FindByDatasetId(datasetId); var spectraMaps = providers.MSFeatureToMSnFeatureCache.FindByDatasetId(datasetId); var msFeatures = providers.MSFeatureCache.FindByDatasetId(datasetId); // Make a one pass through each enumerable list, // then use the maps to join the data together var dictFeatures = new Dictionary<int, UMCLight>(); var dictSpectra = new Dictionary<int, MSSpectra>(); var dictPeptide = new Dictionary<int, Peptide>(); var dictMsFeatures = new Dictionary<int, MSFeatureLight>(); foreach (var sequence in sequences) { if (sequence.GroupId != datasetId) continue; var peptide = new Peptide { Sequence = sequence.Sequence, Id = sequence.Id, }; dictPeptide.Add(peptide.Id, peptide); } msFeatures.ForEach(x => dictMsFeatures.Add(x.Id, x)); features.ForEach(x => dictFeatures.Add(x.Id, x)); spectra.ForEach(x => dictSpectra.Add(x.Id, x)); var count = 0; // Map the MSMS foreach (var map in sequenceMaps) { MSSpectra spectrum; Peptide peptide; var workedSpectra = dictSpectra.TryGetValue(map.MsnFeatureId, out spectrum); var workedPeptide = dictPeptide.TryGetValue(map.SequenceId, out peptide); if (workedSpectra && workedPeptide) { spectrum.Peptides.Add(peptide); peptide.Spectrum = spectrum; count++; } } Console.WriteLine("Mapped {0} peptides to spectra", count); count = 0; // Map the spectra.... foreach (var map in spectraMaps) { UMCLight feature; MSSpectra spectrum; MSFeatureLight msFeature; var workedFeatures = dictFeatures.TryGetValue(map.LCMSFeatureID, out feature); var workedSpectra = dictSpectra.TryGetValue(map.MSMSFeatureID, out spectrum); var workedMsFeature = dictMsFeatures.TryGetValue(map.MSFeatureID, out msFeature); if (!workedFeatures || !workedSpectra || !workedMsFeature) continue; var metaData = new ScanSummary { MsLevel = 2, PrecursorMz = spectrum.PrecursorMz, Scan = spectrum.Scan }; spectrum.ScanMetaData = metaData; msFeature.MSnSpectra.Add(spectrum); spectrum.ParentFeature = msFeature; feature.AddChildFeature(msFeature); msFeature.Umc = feature; count++; } Console.WriteLine("Mapped {0} spectra to parent Features", count); return features; }
/// <summary> /// Clusters spectra together based on similarity. /// </summary> /// <param name="start"></param> /// <param name="stop"></param> /// <param name="features"></param> private List <MsmsCluster> Cluster(int start, int stop, List <MSFeatureLight> features, ISpectraProvider provider, double similarityTolerance) { var massTolerance = MassTolerance; // Maps the feature to a cluster ID. var featureMap = new Dictionary <MSFeatureLight, int>(); // Maps the cluster ID to a cluster. var clusterMap = new Dictionary <int, MsmsCluster>(); var clusters = new List <MsmsCluster>(); // Create singleton clusters. var id = 0; for (var i = start; i < stop; i++) { var feature = features[i]; var cluster = new MsmsCluster(); cluster.Id = id++; cluster.MeanScore = 0; cluster.Features.Add(feature); featureMap.Add(feature, cluster.Id); clusterMap.Add(cluster.Id, cluster); } var protonMass = AdductMass; // Then iterate and cluster. for (var i = start; i < stop; i++) { var featureI = features[i]; var clusterI = clusterMap[featureMap[featureI]]; for (var j = i + 1; j < stop; j++) { var featureJ = features[j]; var clusterJ = clusterMap[featureMap[featureJ]]; // Don't cluster the same thing if (clusterI.Id == clusterJ.Id) { continue; } // Don't cluster from the same dataset. Let the linkage algorithm decide if they // belong in the same cluster, and later, go back and determine if the cluster is valid or not. if (featureI.GroupId == featureJ.GroupId) { continue; } // Check the scan difference. If it fits then we are within range. var scanDiff = Math.Abs(featureI.Scan - featureJ.Scan); if (scanDiff <= ScanRange) { // Use the most abundant mass because it had a higher chance of being fragmented. var mzI = (featureI.MassMonoisotopicMostAbundant / featureI.ChargeState) + protonMass; var mzJ = (featureJ.MassMonoisotopicMostAbundant / featureJ.ChargeState) + protonMass; var mzDiff = Math.Abs(mzI - mzJ); if (mzDiff <= MzTolerance) { var scanSummary = new ScanSummary(); if (featureI.MSnSpectra[0].Peaks.Count <= 0) { featureI.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureI.MSnSpectra[0].Scan, featureI.GroupId, out scanSummary); featureI.MSnSpectra[0].Peaks = XYData.Bin(featureI.MSnSpectra[0].Peaks, 0, 2000, MzTolerance); } if (featureJ.MSnSpectra[0].Peaks.Count <= 0) { featureJ.MSnSpectra[0].Peaks = provider.GetRawSpectra(featureJ.MSnSpectra[0].Scan, featureJ.GroupId, out scanSummary); featureJ.MSnSpectra[0].Peaks = XYData.Bin(featureJ.MSnSpectra[0].Peaks, 0, 2000, MzTolerance); } // Compute similarity var score = SpectralComparer.CompareSpectra(featureI.MSnSpectra[0], featureJ.MSnSpectra[0]); if (score >= similarityTolerance) { clusterJ.MeanScore += score; foreach (var xFeature in clusterI.Features) { clusterJ.Features.Add(xFeature); featureMap[xFeature] = clusterJ.Id; clusterMap.Remove(clusterI.Id); } } } } } } clusters.AddRange(clusterMap.Values); for (var i = start; i < stop; i++) { features[i].MSnSpectra[0].Peaks.Clear(); } foreach (var cluster in clusters) { cluster.MeanScore /= (cluster.Features.Count - 1); } return(clusters); }
public MSSpectra GetSpectrum(int scan, int group, int scanLevel, out ScanSummary summary, bool loadPeaks) { throw new NotImplementedException(); }
public static void ExportMsMs(this UMCClusterLight cluster, string path, List<DatasetInformation> datasets, IMsMsSpectraWriter writer) { // Let's map the datasets first. var readers = new Dictionary<int, ISpectraProvider>(); var information = new Dictionary<int, DatasetInformation>(); datasets.ForEach(x => information.Add(x.DatasetId, x)); // We are only loading what datasets we have to here! // The point is, each cluster or feature may have come from a different raw data source... // since we dont store all of the data in memory, we have to fetch it from the appropriate source. // This means that we have to go into the raw data and get the scans for an MSMS spectra. foreach (var feature in cluster.Features) { if (!readers.ContainsKey(feature.GroupId)) { if (information.ContainsKey(feature.GroupId)) { var singleInfo = information[feature.GroupId]; if (singleInfo.Raw != null && singleInfo.RawPath != null) { // Make sure that we have a file. if (!File.Exists(singleInfo.RawPath)) continue; // Here we create a data file reader for the file we want to access. var provider = RawLoaderFactory.CreateFileReader(singleInfo.RawPath); // Then we make sure we key it to the provider. provider.AddDataFile(singleInfo.RawPath, feature.GroupId); // Then make sure we map it for a dataset, so when we sort through a cluster // we make sure that we can access in O(1) time. readers.Add(feature.GroupId, provider); } } } } // We flag the first write, so that if the file exists, we overwrite. They should have done // checking to make sure that the file was already created...we dont care. var firstWrite = true; foreach (var feature in cluster.Features) { if (readers.ContainsKey(feature.GroupId)) { var provider = readers[feature.GroupId]; foreach (var msFeature in feature.MsFeatures) { foreach (var spectrum in msFeature.MSnSpectra) { var summary = new ScanSummary(); var data = provider.GetRawSpectra(spectrum.Scan, spectrum.GroupId, out summary); spectrum.Peaks = data; spectrum.ScanMetaData = summary; } if (firstWrite) { writer.Write(path, msFeature.MSnSpectra); } else { writer.Append(path, msFeature.MSnSpectra); } } } } }
public static void ExportMsMs(this UMCClusterLight cluster, string path, List <DatasetInformation> datasets, IMsMsSpectraWriter writer) { // Let's map the datasets first. var readers = new Dictionary <int, ISpectraProvider>(); var information = new Dictionary <int, DatasetInformation>(); datasets.ForEach(x => information.Add(x.DatasetId, x)); // We are only loading what datasets we have to here! // The point is, each cluster or feature may have come from a different raw data source... // since we dont store all of the data in memory, we have to fetch it from the appropriate source. // This means that we have to go into the raw data and get the scans for an MSMS spectra. foreach (var feature in cluster.Features) { if (!readers.ContainsKey(feature.GroupId)) { if (information.ContainsKey(feature.GroupId)) { var singleInfo = information[feature.GroupId]; if (singleInfo.RawFile != null) { // Make sure that we have a file. if (!File.Exists(singleInfo.RawFile.Path)) { continue; } // Here we create a data file reader for the file we want to access. var provider = RawLoaderFactory.CreateFileReader(singleInfo.RawFile.Path); // Then we make sure we key it to the provider. provider.AddDataFile(singleInfo.RawFile.Path, feature.GroupId); // Then make sure we map it for a dataset, so when we sort through a cluster // we make sure that we can access in O(1) time. readers.Add(feature.GroupId, provider); } } } } // We flag the first write, so that if the file exists, we overwrite. They should have done // checking to make sure that the file was already created...we dont care. var firstWrite = true; foreach (var feature in cluster.Features) { if (readers.ContainsKey(feature.GroupId)) { var provider = readers[feature.GroupId]; foreach (var msFeature in feature.MsFeatures) { foreach (var spectrum in msFeature.MSnSpectra) { var summary = new ScanSummary(); var data = provider.GetRawSpectra(spectrum.Scan, spectrum.GroupId, out summary); spectrum.Peaks = data; spectrum.ScanMetaData = summary; } if (firstWrite) { writer.Write(path, msFeature.MSnSpectra); } else { writer.Append(path, msFeature.MSnSpectra); } } } } }
public void CreateUMCClusterLight(string databasePath, bool indexDatabase) { // If the database is not index then do so...but before the session to the db is opened. if (indexDatabase) { DatabaseIndexer.IndexClusters(databasePath); DatabaseIndexer.IndexFeatures(databasePath); } // This is a factory based method that creates a set of data access providers used throughout MultiAlign var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false); // If you just wanted the clusters you could do this: // 1. Connect to the database //NHibernateUtil.ConnectToDatabase(databasePath, false); // 2. Then extract all of the clusters //IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate(); //List<UMCClusterLight> clusters = clusterCache.FindAll(); var clusters = providers.ClusterCache.FindAll(); var shouldGetMsFeatures = true; var shouldGetMsMsFeatures = true; var shouldGetRawData = false; // This gets all of the dataset information and maps to a dictionary...if you want the raw data // otherwise comment this out. var datasets = providers.DatasetCache.FindAll(); var datasetMap = new Dictionary<int, DatasetInformation>(); datasets.ForEach(x => datasetMap.Add(x.DatasetId, x)); foreach (var cluster in clusters) { cluster.ReconstructUMCCluster(providers, shouldGetMsFeatures, shouldGetMsMsFeatures); foreach (var feature in cluster.Features) { foreach (var msFeature in feature.Features) { foreach (var spectrumMetaData in msFeature.MSnSpectra) { // then you can do stuff with the ms/ms spectra // If you had the path to the raw file, you could create a reader for you to extract the MS/MS spectra // This supports mzXML and .RAW Thermo files based on the file extension. if (shouldGetRawData) { DatasetInformation info = null; var hasKey = datasetMap.TryGetValue(spectrumMetaData.GroupId, out info); if (hasKey) { if (info.Raw != null) { // This might seem kind of klunky, but it's called a bridge, this way I can access // MS/MS spectra from PNNLOmics without having to reference any of the Thermo DLL's // Nor support file reading capability. This is also nice because I don't have to load // several MS/MS spectra when analyzing large datasets for my spectral clustering work. var rawReader = RawLoaderFactory.CreateFileReader(info.Raw.Path); rawReader.AddDataFile(info.Raw.Path, spectrumMetaData.GroupId); // Then grab the actual spectrum... var summary = new ScanSummary(); var spectrum = rawReader.GetRawSpectra(spectrumMetaData.Scan, spectrumMetaData.GroupId, 2, out summary); // Then do what you want... // Profit??? } } } } } } } }
public List<XYData> GetRawSpectra(int scan, int groupId, int msLevel, out ScanSummary summary) { var rawReader = GetScanSummaryAndReader(scan, groupId, out summary); if (rawReader == null) return null; if (summary.MsLevel != msLevel && msLevel > 0) return null; var data = LoadRawSpectra(rawReader, scan); return data; }
public MSSpectra GetSpectrum(int scan, int group, int scanLevel, out ScanSummary summary, bool loadPeaks) { return(m_provider.GetSpectrum(scan, group, scanLevel, out summary, loadPeaks)); }
private ScanSummary GetScanSummary(int scan, XRawFileIO rawReader, out FinniganFileReaderBaseClass.udtScanHeaderInfoType header) { rawReader.GetScanInfo(scan, out header); var summary = new ScanSummary { MsLevel = header.MSLevel, Time = header.RetentionTime, Scan = scan, TotalIonCurrent = Convert.ToInt64(header.TotalIonCurrent), PrecursorMz = header.ParentIonMZ, CollisionType = CollisionType.Other, }; switch (header.CollisionMode) { case "cid": summary.CollisionType = CollisionType.Cid; break; case "hcd": summary.CollisionType = CollisionType.Hcd; break; case "etd": summary.CollisionType = CollisionType.Etd; break; case "ecd": summary.CollisionType = CollisionType.Ecd; break; case "hid": summary.CollisionType = CollisionType.Hid; break; } return summary; }
/// <summary> /// Gets the raw data from the data file. /// </summary> /// <param name="scan"></param> /// <param name="groupId">File Group ID</param> /// <param name="summary"></param> /// <returns></returns> public List <XYData> GetRawSpectra(int scan, out ScanSummary summary) { return(GetRawSpectra(scan, -1, out summary)); }
public MSSpectra GetSpectrum(int scan, int group, int scanLevel, out ScanSummary summary, bool loadPeaks) { if (m_spectraMap.ContainsKey(scan)) { summary = m_spectraMap[scan].ScanMetaData; if (loadPeaks) { m_spectraMap[scan].Peaks = GetRawSpectra(scan, group, scanLevel, out summary); } return m_spectraMap[scan]; } return m_reader.GetSpectrum(scan, group, scanLevel, out summary, loadPeaks); }
public MSSpectra GetSpectrum(int scan, int group, int scanLevel, out ScanSummary summary, bool loadPeaks) { return m_provider.GetSpectrum(scan, group, scanLevel, out summary, loadPeaks); }