private void ProcessScan(ScanAndTempProperties <TScan, TRun> scanAndTempProperties) { float[]? intensities = parseBinaryData ? scanAndTempProperties.Intensities?.ExtractFloatArray() : null; float[]? mzs = parseBinaryData ? scanAndTempProperties.Mzs?.ExtractFloatArray() : null; foreach (var scanConsumer in scanConsumers) { scanConsumer.Notify(scanAndTempProperties.Scan, mzs, intensities, scanAndTempProperties.Run); } }
private void ProcessScanThreadedOrNot(ScanAndTempProperties <TScan, TRun> scanAndTempProperties) { AddScanToRun(scanAndTempProperties.Scan, scanAndTempProperties.Run); if (null == queue) { ProcessScan(scanAndTempProperties); } else { queue.Enqueue(scanAndTempProperties); } }
private static void FindIrtPeptideCandidates(ScanAndTempProperties scan, Run run, List <SpectrumPoint> spectrum) { foreach (Library.Peptide peptide in run.AnalysisSettings.IrtLibrary.PeptideList.Values) { var irtIntensities = new List <float>(); var irtMzs = new List <float>(); var peptideTransitions = run.AnalysisSettings.IrtLibrary.TransitionList.Values.OfType <Library.Transition>().Where(x => x.PeptideId == peptide.Id); int transitionsLeftToSearch = peptideTransitions.Count(); foreach (Library.Transition t in peptideTransitions) { if (irtIntensities.Count() + transitionsLeftToSearch < run.AnalysisSettings.IrtMinPeptides) { break; } var spectrumPoints = spectrum.Where(x => x.Intensity > run.AnalysisSettings.IrtMinIntensity && Math.Abs(x.Mz - t.ProductMz) < run.AnalysisSettings.IrtMassTolerance); if (spectrumPoints.Any()) { var maxIntensity = spectrumPoints.Max(x => x.Intensity); irtIntensities.Add(maxIntensity); irtMzs.Add(spectrumPoints.Where(x => x.Intensity == maxIntensity).First().Mz); } transitionsLeftToSearch--; } if (irtIntensities.Count >= run.AnalysisSettings.IrtMinPeptides) { run.IRTHits.Add(new CandidateHit() { PeptideSequence = peptide.Sequence, Intensities = irtIntensities, ActualMzs = irtMzs, RetentionTime = scan.Scan.ScanStartTime, PrecursorTargetMz = peptide.AssociatedTransitions.First().PrecursorMz, ProductTargetMzs = peptide.AssociatedTransitions.Select(x => x.ProductMz).ToList() }); } } }
private static void GetBinaryData(XmlReader reader, ScanAndTempProperties <TScan, TRun> scan) { string base64 = string.Empty; Bitness bitness = Bitness.NotSet; bool isIntensityArray = false; bool isMzArray = false; bool binaryDataArrayRead = false; Compression compression = Compression.Uncompressed; while (reader.Read() && !binaryDataArrayRead) { if (reader.IsStartElement()) { if ("cvParam".Equals(reader.LocalName)) { switch (reader.GetAttribute("accession")) { case "MS:1000574": if ("zlib compression".Equals(reader.GetAttribute("name"))) { compression = Compression.Zlib; } break; case "MS:1000521": //32 bit float bitness = Bitness.IEEE754FloatLittleEndian; break; case "MS:1000523": //64 bit float bitness = Bitness.IEEE754DoubleLittleEndian; break; case "MS:1000515": //intensity array isIntensityArray = true; break; case "MS:1000514": //mz array isMzArray = true; break; } } else if ("binary".Equals(reader.LocalName)) { reader.ReadStartElement(); base64 = reader.ReadContentAsString(); } } else if (reader.NodeType == XmlNodeType.EndElement && "binaryDataArray".Equals(reader.LocalName)) { binaryDataArrayRead = true; Base64StringAndDecodingHints base64StringAndDecodingHints = new Base64StringAndDecodingHints(base64, compression, bitness); if (isIntensityArray) { scan.Intensities = base64StringAndDecodingHints; } else if (isMzArray) { scan.Mzs = base64StringAndDecodingHints; } } } }
public void ReadSpectrum(XmlReader reader, TRun run) { TScan scan = ScanFactory.CreateScan(); //The cycle number is within a kvp string in the following format: "sample=1 period=1 cycle=1 experiment=1" // //This is a bit code-soup but I didn't want to spend more than one line on it and it should be robust enough not just to select on index // //This has only been tested on Sciex converted data // //Paul Brack 2019/04/03 bool CycleInfoInID = false; if (run.SourceFileTypes[0].EndsWith("wiff", StringComparison.InvariantCultureIgnoreCase) || run.SourceFileTypes[0].ToUpper().EndsWith("scan", StringComparison.InvariantCultureIgnoreCase)) { if (!string.IsNullOrEmpty(reader.GetAttribute("id")) && !string.IsNullOrEmpty(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")))) { scan.Cycle = int.Parse(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")).Split('=').Last()); if (scan.Cycle != 0)//Some wiffs don't have that info so let's check { CycleInfoInID = true; } } } bool cvParamsRead = false; double previousTargetMz = 0; int currentCycle = 0; bool hasAtLeastOneMS1 = false; ScanAndTempProperties <TScan, TRun> scanAndTempProperties = new ScanAndTempProperties <TScan, TRun>(scan, run); while (reader.Read() && !cvParamsRead) { if (reader.IsStartElement()) { if (reader.LocalName == "cvParam") { switch (reader.GetAttribute("accession")) { case "MS:1000511": scan.MsLevel = int.Parse(reader.GetAttribute("value")); break; case "MS:1000285": scan.TotalIonCurrent = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000016": scan.ScanStartTime = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); run.StartTime = Math.Min(run.StartTime, scan.ScanStartTime); run.LastScanTime = Math.Max(run.LastScanTime, scan.ScanStartTime); //technically this is the starttime of the last scan not the completion time break; case "MS:1000829": scan.IsolationWindowUpperOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000828": scan.IsolationWindowLowerOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000827": scan.IsolationWindowTargetMz = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; } } else if (reader.LocalName == "binaryDataArray") { GetBinaryData(reader, scanAndTempProperties); } if (scan.MsLevel == null && reader.LocalName == "referenceableParamGroupRef") { scan.MsLevel = reader.GetAttribute("ref") == SurveyScanReferenceableParamGroupId ? 1 : 2; } } else if (reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "spectrum") { if (!CycleInfoInID) { if (scan.MsLevel == 1) { currentCycle++; scan.Cycle = currentCycle; hasAtLeastOneMS1 = true; } //if there is ScanAndTempProperties ms1: else if (hasAtLeastOneMS1) { scan.Cycle = currentCycle; } //if there is no ms1: else { if (previousTargetMz >= scan.IsolationWindowTargetMz) { currentCycle++; } scan.Cycle = currentCycle; } } previousTargetMz = scan.IsolationWindowTargetMz; ProcessScanThreadedOrNot(scanAndTempProperties); cvParamsRead = true; } } }
private static void ParseBase64Data(ScanAndTempProperties scan, Run run, bool threading, bool irt) { float[] intensities = ExtractFloatArray(scan.Base64IntensityArray, scan.IntensityZlibCompressed, scan.IntensityBitLength); float[] mzs = ExtractFloatArray(scan.Base64MzArray, scan.MzZlibCompressed, scan.MzBitLength); if (intensities.Count() == 0) { intensities = FillZeroArray(intensities); mzs = FillZeroArray(mzs); logger.Debug("Empty binary array for a MS{0} scan in cycle number: {1}. The empty scans have been filled with zero values.", scan.Scan.MsLevel, scan.Scan.Cycle); run.MissingScans++; } var spectrum = intensities.Select((x, i) => new SpectrumPoint(x, mzs[i], (float)scan.Scan.ScanStartTime)).Where(x => x.Intensity >= run.AnalysisSettings.MinimumIntensity).ToList(); //Predicted singly charged proportion: //The theory is that an M and M+1 pair are singly charged so we are very simply just looking for occurences where two ions are 1 mz apart (+-massTolerance) //We therefore create an array cusums that accumulates the difference between ions, so for every ion we calculate the distance between that ion //and the previous and add that to each of the previous ions' cusum of differences. If the cusum of an ion overshoots 1 +massTolerance, we stop adding to it, if it reaches our mark we count it and stop adding to it List <int> indexes = new List <int>(); float[] cusums = new float[mzs.Length]; int movingPoint = 0; double minimum = 1 - 0.001; double maximum = 1 + 0.001; for (int i = 1; i < mzs.Length; i++) { float distance = mzs[i] - mzs[i - 1]; bool matchedWithLower = false; for (int ii = movingPoint; ii < i; ii++) { cusums[ii] += distance; if (cusums[ii] < minimum) { continue; } else if (cusums[ii] > minimum && cusums[ii] < maximum) { if (!matchedWithLower)//This is to try and minimise false positives where for example if you have an array: 351.14, 351.15, 352.14 all three get chosen. { indexes.Add(i); indexes.Add(movingPoint); } movingPoint += 1; matchedWithLower = true; continue; } else if (cusums[ii] > maximum) { movingPoint += 1; } } } int distinct = indexes.Distinct().Count(); int len = mzs.Length; scan.Scan.ProportionChargeStateOne = (double)distinct / (double)len; if (scan.Scan.TotalIonCurrent == 0) { scan.Scan.TotalIonCurrent = intensities.Sum(); TicNotFound = true; } scan.Scan.Spectrum = new Spectrum() { SpectrumPoints = spectrum }; scan.Scan.IsolationWindowLowerBoundary = scan.Scan.IsolationWindowTargetMz - scan.Scan.IsolationWindowLowerOffset; scan.Scan.IsolationWindowUpperBoundary = scan.Scan.IsolationWindowTargetMz + scan.Scan.IsolationWindowUpperOffset; scan.Scan.Density = spectrum.Count(); scan.Scan.BasePeakIntensity = intensities.Max(); scan.Scan.BasePeakMz = mzs[Array.IndexOf(intensities, intensities.Max())]; AddScanToRun(scan.Scan, run); float basepeakIntensity; if (intensities.Count() > 0) { basepeakIntensity = intensities.Max(); int maxIndex = intensities.ToList().IndexOf(basepeakIntensity); double mz = mzs[maxIndex]; if (run.BasePeaks.Count(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance) < 1)//If a basepeak with this mz doesn't exist yet add it { BasePeak bp = new BasePeak(mz, scan.Scan.ScanStartTime, basepeakIntensity); run.BasePeaks.Add(bp); } else //we do have a match, now lets figure out if they fall within the rtTolerance { //find out which basepeak foreach (BasePeak thisbp in run.BasePeaks.Where(x => Math.Abs(x.Mz - mz) < run.AnalysisSettings.MassTolerance)) { bool found = false; for (int rt = 0; rt < thisbp.BpkRTs.Count(); rt++) { if (Math.Abs(thisbp.BpkRTs[rt] - scan.Scan.ScanStartTime) < run.AnalysisSettings.RtTolerance)//this is part of a previous basepeak, or at least considered to be { found = true; break; } } if (!found)//This is considered to be a new instance { thisbp.BpkRTs.Add(scan.Scan.ScanStartTime); thisbp.Intensities.Add(basepeakIntensity); } } } } else { basepeakIntensity = 0; } //Extract info for Basepeak chromatograms if (irt) { FindIrtPeptideCandidates(scan, run, spectrum); } if (threading) { cde.Signal(); } }
private static void GetBinaryData(XmlReader reader, ScanAndTempProperties scan) { string base64 = String.Empty; int bits = 0; bool intensityArray = false; bool mzArray = false; bool binaryDataArrayRead = false; bool IsZlibCompressed = false; while (reader.Read() && binaryDataArrayRead == false) { if (reader.IsStartElement()) { if (reader.LocalName == "cvParam") { switch (reader.GetAttribute("accession")) { case "MS:1000574": if (reader.GetAttribute("name") == "zlib compression") { IsZlibCompressed = true; } break; case "MS:1000521": //32 bit float bits = 32; break; case "MS:1000523": //64 bit float bits = 64; break; case "MS:1000515": //intensity array intensityArray = true; break; case "MS:1000514": //mz array mzArray = true; break; } } else if (reader.LocalName == "binary") { reader.ReadStartElement(); scan.Scan.SpectrumXmlBase64Line = Xli.LineNumber; scan.Scan.SpectrumXmlBase64LinePos = Xli.LinePosition; base64 = reader.ReadContentAsString(); scan.Scan.SpectrumXmlBase64Length = base64.Length; } } if (reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "binaryDataArray") { binaryDataArrayRead = true; if (intensityArray) { scan.Base64IntensityArray = base64; scan.IntensityBitLength = bits; scan.IntensityZlibCompressed = IsZlibCompressed; } else if (mzArray) { scan.Base64MzArray = base64; scan.MzBitLength = bits; scan.MzZlibCompressed = IsZlibCompressed; } } } }
public void ReadSpectrum(XmlReader reader, Run run, bool irt) { ScanAndTempProperties scan = new ScanAndTempProperties(run.AnalysisSettings.CacheSpectraToDisk); //The cycle number is within a kvp string in the following format: "sample=1 period=1 cycle=1 experiment=1" // //This is a bit code-soup but I didn't want to spend more than one line on it and it should be robust enough not just to select on index // //This has only been tested on Sciex converted data // //Paul Brack 2019/04/03 bool CycleInfoInID = false; if (run.SourceFileTypes[0].EndsWith("wiff", StringComparison.InvariantCultureIgnoreCase) || run.SourceFileTypes[0].ToUpper().EndsWith("scan", StringComparison.InvariantCultureIgnoreCase)) { if (!string.IsNullOrEmpty(reader.GetAttribute("id")) && !string.IsNullOrEmpty(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")))) { scan.Scan.Cycle = int.Parse(reader.GetAttribute("id").Split(' ').DefaultIfEmpty("0").Single(x => x.Contains("cycle")).Split('=').Last()); if (scan.Scan.Cycle != 0) //Some wiffs don't have that info so let's check { CycleInfoInID = true; } } } bool cvParamsRead = false; while (reader.Read() && !cvParamsRead) { if (reader.IsStartElement()) { if (reader.LocalName == "cvParam") { switch (reader.GetAttribute("accession")) { case "MS:1000511": scan.Scan.MsLevel = int.Parse(reader.GetAttribute("value")); break; case "MS:1000285": scan.Scan.TotalIonCurrent = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000016": scan.Scan.ScanStartTime = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); run.StartTime = Math.Min(run.StartTime, scan.Scan.ScanStartTime); run.LastScanTime = Math.Max(run.LastScanTime, scan.Scan.ScanStartTime);//technically this is the starttime of the last scan not the completion time break; case "MS:1000829": scan.Scan.IsolationWindowUpperOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000828": scan.Scan.IsolationWindowLowerOffset = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; case "MS:1000827": scan.Scan.IsolationWindowTargetMz = double.Parse(reader.GetAttribute("value"), CultureInfo.InvariantCulture); break; } } else if (reader.LocalName == "binaryDataArray") { GetBinaryData(reader, scan); } if (scan.Scan.MsLevel == null && reader.LocalName == "referenceableParamGroupRef") { if (reader.GetAttribute("ref") == SurveyScanReferenceableParamGroupId) { scan.Scan.MsLevel = 1; } else { scan.Scan.MsLevel = 2; } } } else if (reader.NodeType == XmlNodeType.EndElement && reader.LocalName == "spectrum") { if (!CycleInfoInID) { if (scan.Scan.MsLevel == 1) { currentCycle++; scan.Scan.Cycle = currentCycle; MS1 = true; } //if there is ScanAndTempProperties ms1: else if (MS1) { scan.Scan.Cycle = currentCycle; } //if there is no ms1: else { if (previousTargetMz < scan.Scan.IsolationWindowTargetMz) { scan.Scan.Cycle = currentCycle; } else { currentCycle++; scan.Scan.Cycle = currentCycle; } } } previousTargetMz = scan.Scan.IsolationWindowTargetMz; if (ParseBinaryData) { if (Threading) { cde.AddCount(); if (run.AnalysisSettings.CacheSpectraToDisk) //this option exists largely to restrict RAM use, so don't let queue get too big { while (cde.CurrentCount > MaxQueueSize) { Thread.Sleep(1000); } } ThreadPool.QueueUserWorkItem(state => ParseBase64Data(scan, run, Threading, irt)); } else { ParseBase64Data(scan, run, Threading, irt); } } else { AddScanToRun(scan.Scan, run); } cvParamsRead = true; } } }