public void AddEnvelopes(int minCharge, int maxCharge, int minScanNum, int maxScanNum, IList<ObservedIsotopeEnvelope> envelopes = null) { var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex(); var minCol = ms1ScanNumToIndex[minScanNum]; var maxCol = ms1ScanNumToIndex[maxScanNum]; var nRows = maxCharge - minCharge + 1; var nCols = maxCol - minCol + 1; MinCharge = minCharge; MaxCharge = maxCharge; MinScanNum = minScanNum; MaxScanNum = maxScanNum; Envelopes = new ObservedIsotopeEnvelope[nRows][]; for(var i = 0; i < nRows; i++) Envelopes[i] = new ObservedIsotopeEnvelope[nCols]; if (envelopes == null) return; foreach (var envelope in envelopes) { var i = envelope.Charge - MinCharge; var j = ms1ScanNumToIndex[envelope.ScanNum] - minCol; if (i < 0 || i >= nRows || j < 0 || j >= nCols) continue; Envelopes[i][j] = envelope; } }
public bool CheckChargeState(ObservedIsotopeEnvelope envelope) { var checkCharge = envelope.Charge; if (checkCharge > 20) { return(true); //high charge (> +20), just pass } var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var nPeaks = peakEndIndex - peakStartIndex + 1; if (nPeaks < 10) { return(false); } if (envelope.NumberOfPeaks > nPeaks * 0.7) { return(true); } var tolerance = new Tolerance(5); var threshold = nPeaks * 0.5; var mzTol = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4); var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { for (var j = i + 1; j <= peakEndIndex; j++) { var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz; if (deltaMz > maxDeltaMz) { break; } for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge || c > maxCheckCharge) { continue; } var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) { return(false); } } } } return(true); }
public void UpdateWithDecoyScore(List <Ms1Spectrum> ms1Spectra, int targetMinCharge, int targetMaxCharge) { var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex(); var ms1ScanNums = _run.GetMs1ScanVector(); var minCol = ms1ScanNumToIndex[MinScanNum]; var maxCol = ms1ScanNumToIndex[MaxScanNum]; MinCharge = targetMinCharge; MaxCharge = targetMaxCharge; var rnd = new Random(); var comparer = new MzComparerWithBinning(28); var mostAbuInternalIndex = TheoreticalEnvelope.IndexOrderByRanking[0]; var nRows = MaxCharge - MinCharge + 1; var nCols = maxCol - minCol + 1; Envelopes = new ObservedIsotopeEnvelope[nRows][]; for (var i = 0; i < nRows; i++) { Envelopes[i] = new ObservedIsotopeEnvelope[nCols]; } for (var charge = targetMinCharge; charge <= targetMaxCharge; charge++) { var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuInternalIndex); if (_run.MaxMs1Mz < mostAbuMz || mostAbuMz < _run.MinMs1Mz) { continue; } for (var col = minCol; col <= maxCol; col++) { var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz); var numMzBins = comparer.GetBinNumber(localWin.MaxMz) - comparer.GetBinNumber(localWin.MinMz) + 1; var peakSet = new Ms1Peak[TheoreticalEnvelope.Size]; for (var k = 0; k < peakSet.Length; k++) { var r = rnd.Next(0, numMzBins); if (r < localWin.PeakCount) { peakSet[k] = (Ms1Peak)ms1Spectra[col].Peaks[r + localWin.PeakStartIndex]; } } var env = new ObservedIsotopeEnvelope(Mass, charge, ms1ScanNums[col], peakSet, TheoreticalEnvelope); //AddObservedEnvelope(env); Envelopes[charge - MinCharge][col - minCol] = env; } } UpdateScore(ms1Spectra, false); }
internal void Expand(ObservedIsotopeEnvelope envelope) { if (MaxScanNum < 0 || envelope.ScanNum > MaxScanNum) { MaxScanNum = envelope.ScanNum; } if (MinScanNum < 0 || envelope.ScanNum < MinScanNum) { MinScanNum = envelope.ScanNum; } if (MaxCharge < 0 || envelope.Charge > MaxCharge) { MaxCharge = envelope.Charge; } if (MinCharge < 0 || envelope.Charge < MinCharge) { MinCharge = envelope.Charge; } }
public bool CheckChargeState(ObservedIsotopeEnvelope envelope) { var checkCharge = envelope.Charge; if (checkCharge > 20) return true; //high charge (> +20), just pass var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var nPeaks = peakEndIndex - peakStartIndex + 1; if (nPeaks < 10) return false; if (envelope.NumberOfPeaks > nPeaks * 0.7) return true; var tolerance = new Tolerance(5); var threshold = nPeaks * 0.5; var mzTol = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4); var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { for (var j = i + 1; j <= peakEndIndex; j++) { var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz; if (deltaMz > maxDeltaMz) break; for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge || c > maxCheckCharge) continue; var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) return false; } } } return true; }
public void AddEnvelopes(int minCharge, int maxCharge, int minScanNum, int maxScanNum, IList <ObservedIsotopeEnvelope> envelopes = null) { var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex(); var minCol = ms1ScanNumToIndex[minScanNum]; var maxCol = ms1ScanNumToIndex[maxScanNum]; var nRows = maxCharge - minCharge + 1; var nCols = maxCol - minCol + 1; MinCharge = minCharge; MaxCharge = maxCharge; MinScanNum = minScanNum; MaxScanNum = maxScanNum; Envelopes = new ObservedIsotopeEnvelope[nRows][]; for (var i = 0; i < nRows; i++) { Envelopes[i] = new ObservedIsotopeEnvelope[nCols]; } if (envelopes == null) { return; } foreach (var envelope in envelopes) { var i = envelope.Charge - MinCharge; var j = ms1ScanNumToIndex[envelope.ScanNum] - minCol; if (i < 0 || i >= nRows || j < 0 || j >= nCols) { continue; } Envelopes[i][j] = envelope; } }
public LcMsPeakCluster(LcMsRun run, ObservedIsotopeEnvelope observedEnvelope) : this(run, observedEnvelope.TheoreticalEnvelope, observedEnvelope.MonoMass, observedEnvelope.Charge, observedEnvelope.RepresentativePeak.Mz, observedEnvelope.ScanNum, observedEnvelope.Abundance) { }
private LcMsPeakCluster CollectLcMsPeaks(double targetMass, int minRow, int maxRow, int minCol, int maxCol, bool reCollectAllPeaks = false) { var ms1ScanNums = Run.GetMs1ScanVector(); var envelopes = new List<ObservedIsotopeEnvelope>(); var bestBcDist = 100d; ObservedIsotopeEnvelope bestEnvelope = null; var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0]; var tolerance = new Tolerance(Comparer.Ppm * 0.5); var massTol = tolerance.GetToleranceAsTh(targetMass); var nPeaksCutoff = NumberOfPeaksCutoff; var bcCutoff = GetSeedBcDistThreshold(); var corrCutoff = GetSeedCorrThreshold(); for (var i = minRow; i <= maxRow; i++) { for (var j = minCol; j <= maxCol; j++) { if (reCollectAllPeaks) _featureMatrix[i][j].Init(); if (reCollectAllPeaks || !_featureMatrix[i][j].Exist || Math.Abs(_featureMatrix[i][j].AccurateMass - targetMass) > massTol) { var peaks = Ms1Spectra[j].GetAllIsotopePeaks(targetMass, i + _targetMinCharge, _theoreticalEnvelope, tolerance); if (peaks.Count(p => p != null) > 0) { _featureMatrix[i][j].DivergenceDist = _theoreticalEnvelope.GetBhattacharyyaDistance(peaks); ; _featureMatrix[i][j].AccurateMass = targetMass; _featureMatrix[i][j].CorrelationCoeff = _theoreticalEnvelope.GetPearsonCorrelation(peaks); ; Array.Copy(peaks, _featureMatrix[i][j].EnvelopePeaks, peaks.Length); } } if (!_featureMatrix[i][j].Exist) continue; if (_featureMatrix[i][j].CountActivePeaks < nPeaksCutoff) continue; if (_featureMatrix[i][j].DivergenceDist > bcCutoff && _featureMatrix[i][j].CorrelationCoeff < corrCutoff) continue; // exclude outliers var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass, i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks, _theoreticalEnvelope); envelopes.Add(envelope); if (_featureMatrix[i][j].EnvelopePeaks[mostAbuInternalIndex] != null && _featureMatrix[i][j].DivergenceDist < bestBcDist) { bestBcDist = _featureMatrix[i][j].DivergenceDist; bestEnvelope = envelope; } } } if (bestEnvelope == null) return null; var cluster = new LcMsPeakCluster(Run, bestEnvelope); cluster.AddEnvelopes(minRow + _targetMinCharge, maxRow + _targetMinCharge, ms1ScanNums[minCol], ms1ScanNums[maxCol], envelopes); return cluster; }
private IList<LcMsPeakCluster> GetLcMs1PeakClusters(int binNumber) { const int chargeNeighborGap = 4; var targetMass = Comparer.GetMzAverage(binNumber); BuildFeatureMatrix(targetMass); // should be called first var clusters = new List<LcMsPeakCluster>(); // todo : bottom up dataset?? if (_rows.Length < 2 || _cols.Length < 1) return clusters; var tempEnvelope = new double[_theoreticalEnvelope.Size]; var tempEnvelope2 = new double[_theoreticalEnvelope.Size]; var ms1ScanNums = Run.GetMs1ScanVector(); var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex(); var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0]; var tolerance = new Tolerance(Comparer.Ppm*0.5); foreach (var seed in _seedEnvelopes.OrderBy(s=>s.Key).Select(s=> s.Value)) { var row = seed.Charge - _targetMinCharge; var col = ms1ScanNumToIndex[seed.ScanNum]; if (_featureMatrix[row][col].CheckedOutFlag) continue; var mostAbuMz = _theoreticalEnvelope.GetIsotopeMz(seed.Charge, mostAbuInternalIndex); var seedLocalWin = Ms1Spectra[col].GetLocalMzWindow(mostAbuMz); var poissonPvalue = seedLocalWin.GetPoissonTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size); var rankSumPvalue = seedLocalWin.GetRankSumTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size); var goodEnvelope = (rankSumPvalue < 0.01 || poissonPvalue < 0.01); if (!goodEnvelope) continue; var chargeCheck = CorrectChargeState(seed, Ms1Spectra[col]); if (!chargeCheck) continue; var seedMass = _featureMatrix[row][col].AccurateMass; var massTol = tolerance.GetToleranceAsTh(seedMass); var newCluster = new LcMsPeakCluster(Run, seed); Array.Clear(tempEnvelope, 0, tempEnvelope.Length); seed.Peaks.SumEnvelopeTo(tempEnvelope); var neighbors = new Queue<ObservedIsotopeEnvelope>(); neighbors.Enqueue(seed); // pick a seed _featureMatrix[row][col].CheckedOutFlag = true; var summedBcDist = _featureMatrix[row][col].DivergenceDist; var summedCorr = _featureMatrix[row][col].CorrelationCoeff; while (neighbors.Count > 0) { var cell = neighbors.Dequeue(); var charge = cell.Charge; var minRw = (int)Math.Max(charge - _targetMinCharge - chargeNeighborGap, _rows.First()); var maxRw = (int)Math.Min(charge - _targetMinCharge + chargeNeighborGap, _rows.Last()); var currCol = ms1ScanNumToIndex[cell.ScanNum]; for (var k = 0; k < 5; k++) { var j = currCol; if (k < 3) j += k; else j -= (k - 2); if (j < _cols.First() || j > _cols.Last()) continue; for (var i = minRw; i <= maxRw; i++) { if (_featureMatrix[i][j].CheckedOutFlag) continue; if (!(_featureMatrix[i][j].AccurateMass > 0)) continue; if (Math.Abs(seedMass - _featureMatrix[i][j].AccurateMass) > massTol) continue; Array.Copy(tempEnvelope, tempEnvelope2, tempEnvelope2.Length); _featureMatrix[i][j].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); var newDivergence = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope); var newCorrelation = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope); if (_featureMatrix[i][j].DivergenceDist < 0.02 ||_featureMatrix[i][j].CorrelationCoeff > 0.7 || newDivergence < summedBcDist || newCorrelation > summedCorr) { var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass, i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks, _theoreticalEnvelope); neighbors.Enqueue(envelope); newCluster.Expand(envelope); _featureMatrix[i][j].CheckedOutFlag = true; summedBcDist = newDivergence; summedCorr = newCorrelation; } else { Array.Copy(tempEnvelope2, tempEnvelope, tempEnvelope.Length); } } } } LcMsPeakCluster refinedCluster = null; if (summedCorr > 0.5 || summedBcDist < 0.15) { // re-update check-out map SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], false); refinedCluster = GetLcMsPeakCluster(newCluster.RepresentativeMass, newCluster.RepresentativeCharge, newCluster.MinScanNum, newCluster.MaxScanNum, true); } if (refinedCluster != null && (_scorer == null || (_scorer != null && refinedCluster.GoodEnougth && refinedCluster.Score >= _scorer.ScoreThreshold))) { SetCheckOutFlag(_rows.First(), _rows.Last(), ms1ScanNumToIndex[refinedCluster.MinScanNum], ms1ScanNumToIndex[refinedCluster.MaxScanNum], true); clusters.Add(refinedCluster); } else { SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], true); } } return clusters; }
//public const double SNRthreshold = 1.4826; private void BuildFeatureMatrix(double targetMass) { InitFeatureMatrix(); SetTargetMass(targetMass); var observedRows = new BitArray(NRows); var observedCols = new BitArray(NColumns); var mostAbuInternalIdx = _theoreticalEnvelope.IndexOrderByRanking[0]; var totalElutionLength = Run.GetElutionTime(Run.MaxLcScan); var elutionSamplingHalfLen = Math.Max(Math.Min(totalElutionLength * 0.003, 5.0), 0.5); var neighborHalfColumns = (int) Math.Max((elutionSamplingHalfLen/totalElutionLength)*NColumns, 5); var targetMassBinNum = Comparer.GetBinNumber(targetMass); var tolerance = new Tolerance(Comparer.Ppm*0.5); var minMs1Mz = _ms1PeakList.First().Mz; var maxMs1Mz = _ms1PeakList.Last().Mz; var nPeaksCutoff = NumberOfPeaksCutoff; var bcSeedCutoff = GetSeedBcDistThreshold(); var corrSeedCutoff = GetSeedCorrThreshold(); var ms1ScanNums = Run.GetMs1ScanVector(); var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex(); var options = new ParallelOptions(); if (_maxThreadCount > 0) options.MaxDegreeOfParallelism = _maxThreadCount; _seedEnvelopes.Clear(); Parallel.ForEach(_rows, options, row => { var charge = row + _targetMinCharge; for (var col = 0; col < NColumns; col++) _featureMatrix[row][col].Init(); for (var k = 0; k < _theoreticalEnvelope.Size; k++) { var i = _theoreticalEnvelope.IndexOrderByRanking[k]; var isotopeIndex = _theoreticalEnvelope.Isotopes[i].Index; var isotopeMzLb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzStart(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum - 1), charge, isotopeIndex); var isotopeMzUb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzEnd(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum + 1), charge, isotopeIndex); if (isotopeMzLb < minMs1Mz || isotopeMzUb > maxMs1Mz) continue; var st = _ms1PeakList.BinarySearch(new Ms1Peak(isotopeMzLb, 0, 0)); if (st < 0) st = ~st; for (var j = st; j < _ms1PeakList.Count; j++) { var ms1Peak = _ms1PeakList[j]; if (ms1Peak.Mz > isotopeMzUb) break; var col = ms1Peak.Ms1SpecIndex; if (k == 0) // most abundant peak { if (_featureMatrix[row][col].EnvelopePeaks[i] == null || ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity) { _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; _featureMatrix[row][col].AccurateMass = Ion.GetMonoIsotopicMass(ms1Peak.Mz, charge, isotopeIndex); } } else { if (!(_featureMatrix[row][col].AccurateMass > 0)) continue; var expectedPeakMz = Ion.GetIsotopeMz(_featureMatrix[row][col].AccurateMass, charge, isotopeIndex); if (Math.Abs(expectedPeakMz - ms1Peak.Mz) > tolerance.GetToleranceAsTh(ms1Peak.Mz)) continue; // in case of existing isotope peaks, select peaks maximizing envelope similairty if (_featureMatrix[row][col].EnvelopePeaks[i] != null) { if (_featureMatrix[row][col].CountActivePeaks == 1) { if (ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity) _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; } else { var tmpPeak = _featureMatrix[row][col].EnvelopePeaks[i]; var bc1 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; var bc2 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); if (bc1 < bc2) _featureMatrix[row][col].EnvelopePeaks[i] = tmpPeak; } } else { _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; } } } if (k == 0) { // for cells missing most abundant peaks for (var col = 0; col < NColumns; col++) { if (_featureMatrix[row][col].Exist) continue; var highestIntensity = 0d; var inferredAccurateMass = 0d; // find the most intense abundant peak from neighboring cells for (var j = Math.Max(col - neighborHalfColumns, 0); j <= Math.Min(col + neighborHalfColumns, NColumns - 1); j++) { var mostAbuPeak = _featureMatrix[row][j].EnvelopePeaks[mostAbuInternalIdx]; if (mostAbuPeak != null && mostAbuPeak.Intensity > highestIntensity) { highestIntensity = mostAbuPeak.Intensity; inferredAccurateMass = _featureMatrix[row][j].AccurateMass; } } _featureMatrix[row][col].AccurateMass = inferredAccurateMass; } } } for (var col = 0; col < NColumns; col++) { if (!(_featureMatrix[row][col].Exist)) continue; if (_featureMatrix[row][col].CountActivePeaks >= nPeaksCutoff) { var corr = _theoreticalEnvelope.GetPearsonCorrelation(_featureMatrix[row][col].EnvelopePeaks); var bcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); _featureMatrix[row][col].CorrelationCoeff = corr; _featureMatrix[row][col].DivergenceDist = bcDist; if (!observedRows[row]) observedRows[row] = true; if (!observedCols[col]) observedCols[col] = true; // collect seed envelopes var mostAbuPeak = _featureMatrix[row][col].EnvelopePeaks[mostAbuInternalIdx]; if (mostAbuPeak != null && (bcDist < bcSeedCutoff || corr < corrSeedCutoff)) { var signalToNoiseRatio = mostAbuPeak.Intensity / Ms1Spectra[col].MedianIntensity; if (signalToNoiseRatio > 3) { var seed = new ObservedIsotopeEnvelope(_featureMatrix[row][col].AccurateMass, row + _targetMinCharge, ms1ScanNums[col], _featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope); lock (_seedEnvelopes) { _seedEnvelopes.Add(new KeyValuePair<double, ObservedIsotopeEnvelope>(bcDist, seed)); } } } } else { _featureMatrix[row][col].AccurateMass = 0d; } } }// end or row for-loop ); var temp = new List<int>(); for (var i = 0; i < observedRows.Length; i++) if (observedRows[i]) temp.Add(i); _rows = temp.ToArray(); temp.Clear(); for (var i = 0; i < observedCols.Length; i++) if (observedCols[i]) temp.Add(i); _cols = temp.ToArray(); }
private bool CorrectChargeState(ObservedIsotopeEnvelope envelope, Ms1Spectrum spectrum) { if (envelope.Charge > 20) return true; //high charge (> +20), just pass var peaks = spectrum.Peaks; var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var intensityThreshold = envelope.HighestIntensity * 0.15; var nPeaks = 0; for (var i = peakStartIndex; i <= peakEndIndex; i++) { if (peaks[i].Intensity > intensityThreshold) nPeaks++; } if (envelope.NumberOfPeaks > nPeaks * 0.7) return true; //var tolerance = new Tolerance(5); var tolerance = new Tolerance(Comparer.Ppm * 0.5); var threshold = nPeaks * 0.5; var threshold2 = envelope.NumberOfPeaks + (envelope.TheoreticalEnvelope.Size - 1) * 0.7; var mzTol = tolerance.GetToleranceAsTh(peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(envelope.Charge * 2 - 1, 4); var maxCheckCharge = Math.Min(envelope.Charge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { if (!(peaks[i].Intensity > intensityThreshold)) continue; for (var j = i + 1; j <= peakEndIndex; j++) { if (!(peaks[j].Intensity > intensityThreshold)) continue; var deltaMz = peaks[j].Mz - peaks[i].Mz; if (deltaMz > maxDeltaMz) break; if (Math.Abs(deltaMz - mzTol) < float.Epsilon) { // Peaks are too close together; continue continue; } for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge) continue; if (c > maxCheckCharge) break; var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > threshold2) return false; } } } return true; }
public IsotopeEnvelopeStatisticalInfo PreformStatisticalSignificanceTest(ObservedIsotopeEnvelope envelope) { int peakStartIndex; Tuple <double, double> mzBoundary; //var refPeak = envelope.Peaks[envelope.RefIsotopeInternalIndex]; var mostAbuMz = 0d; var mostAbutPeakInternalIndex = envelope.TheoreticalEnvelope.IndexOrderByRanking[0]; if (envelope.Peaks[mostAbutPeakInternalIndex] != null) { mostAbuMz = envelope.Peaks[mostAbutPeakInternalIndex].Mz; } else { mostAbuMz = envelope.TheoreticalEnvelope.GetIsotopeMz(envelope.Charge, mostAbutPeakInternalIndex); } var rankings = GetLocalRankings(mostAbuMz, out peakStartIndex, out mzBoundary); // smallest delta_mz = 0.01 (th) ? var ret = new IsotopeEnvelopeStatisticalInfo { LocalMzStart = mzBoundary.Item1, LocalMzEnd = mzBoundary.Item2, NumberOfLocalPeaks = rankings.Length, NumberOfPossiblePeaks = (int)Math.Ceiling(100 * (mzBoundary.Item2 - mzBoundary.Item1)), NumberOfIsotopePeaks = envelope.Size, }; // calculate ranksum test score var ranksum = 0; var nRankSum = 0; for (var i = 0; i < envelope.Size; i++) { if (envelope.Peaks[i] == null || !envelope.Peaks[i].Active) { continue; } ret.NumberOfMatchedIsotopePeaks++; //if (isotopeList[i].Ratio > RelativeIntesnityThresholdForRankSum) //{ var localIndex = envelope.Peaks[i].IndexInSpectrum - peakStartIndex; if (localIndex >= rankings.Length || localIndex < 0) { continue; } ranksum += rankings[localIndex]; nRankSum++; //} } var pvalue = FitScoreCalculator.GetRankSumPvalue(ret.NumberOfLocalPeaks, nRankSum, ranksum); ret.RankSumScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50; // calculate poisson test score var n = ret.NumberOfPossiblePeaks; var k = ret.NumberOfIsotopePeaks; // # of theretical isotope ions of the mass within the local window var n1 = ret.NumberOfLocalPeaks; // # of detected ions within the local window var k1 = ret.NumberOfMatchedIsotopePeaks; // # of matched ions generating isotope envelope profile var lambda = ((double)n1 / (double)n) * k; pvalue = 1 - Poisson.CDF(lambda, k1); ret.PoissonScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50; return(ret); }
public void UpdateScore(List <Ms1Spectrum> ms1Spectra, bool pValueCheck = true) { var nRows = MaxCharge - MinCharge + 1; var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex(); var minCol = ms1ScanNumToIndex[MinScanNum]; var maxCol = ms1ScanNumToIndex[MaxScanNum]; var nCols = maxCol - minCol + 1; var mostAbuIdx = TheoreticalEnvelope.IndexOrderByRanking[0]; ClearScore(); var bestChargeDist = new double[] { 10.0d, 10.0d }; // sum envelopes at each charge var summedIntensity = new double[TheoreticalEnvelope.Size]; var xicLen = nCols + 18; var xicStartIdx = 9; /* * if (nCols < 13) * { * xicLen = 13; * xicStartIdx = (int) Math.Floor((xicLen - nCols)*0.5); * }*/ var xic2 = new double[2][]; xic2[0] = new double[xicLen]; xic2[1] = new double[xicLen]; var chargeXic = new double[nRows][]; var tempBestBcDist = 10.0d; var repEnvelopeBcDist = 10.0d; ObservedIsotopeEnvelope repEnvelope = null; var repEnvelopeBcDist2 = 10.0d; ObservedIsotopeEnvelope repEnvelope2 = null; var tempBestDistanceScoreAcrossCharge = new double[2] { 10, 10 }; var tempBestIntensityScoreAcrossCharge = new double[2]; var tempBestCorrelationScoreAcrossCharge = new double[2]; for (var i = 0; i < nRows; i++) { var charge = i + MinCharge; var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuIdx); Array.Clear(summedIntensity, 0, summedIntensity.Length); chargeXic[i] = new double[xicLen]; var chargeIdx = (charge % 2 == 0) ? EvenCharge : OddCharge; var summedMostAbuIsotopeIntensity = 0d; var summedReferenceIntensity = 0d; for (var j = 0; j < nCols; j++) { var envelope = Envelopes[i][j]; var col = minCol + j; var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz); if (envelope == null) { continue; } envelope.Peaks.SumEnvelopeTo(summedIntensity); var mostAbuPeak = envelope.Peaks[mostAbuIdx]; if (mostAbuPeak != null && mostAbuPeak.Active) { summedMostAbuIsotopeIntensity += mostAbuPeak.Intensity; summedReferenceIntensity += localWin.HighestIntensity; } AbundanceDistributionAcrossCharge[chargeIdx] += envelope.Abundance; var newBcDist = TheoreticalEnvelope.GetBhattacharyyaDistance(envelope.Peaks); var newCorr = TheoreticalEnvelope.GetPearsonCorrelation(envelope.Peaks); var goodEnvelope = (newBcDist <0.07 || newCorr> 0.7); if (goodEnvelope) { xic2[chargeIdx][xicStartIdx + j] += envelope.Abundance; chargeXic[i][xicStartIdx + j] = envelope.Abundance; } var levelOneEnvelope = true; var levelTwoEnvelope = true; if (pValueCheck) { var poissonPvalue = localWin.GetPoissonTestPvalue(envelope.Peaks, TheoreticalEnvelope.Size); var rankSumPvalue = localWin.GetRankSumTestPvalue(envelope.Peaks, TheoreticalEnvelope.Size); levelOneEnvelope = (rankSumPvalue < 0.01 && poissonPvalue < 0.01); //levelTwoEnvelope = (rankSumPvalue < 0.05 || poissonPvalue < 0.05); } if (levelOneEnvelope) { if (newBcDist < BestDistanceScoreAcrossCharge[chargeIdx]) { BestDistanceScoreAcrossCharge[chargeIdx] = newBcDist; if (localWin.MedianIntensity > 0) { BestIntensityScoreAcrossCharge[chargeIdx] = envelope.HighestIntensity / localWin.HighestIntensity; } else { BestIntensityScoreAcrossCharge[chargeIdx] = 1.0d; } } BestCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(BestCorrelationScoreAcrossCharge[chargeIdx], newCorr); if (newBcDist < repEnvelopeBcDist) { repEnvelopeBcDist = newBcDist; repEnvelope = envelope; } // in the initial scoring, classify major and minor envelopes if (!_initScore && goodEnvelope) { envelope.GoodEnough = true; } } if (levelTwoEnvelope) { if (newBcDist < tempBestDistanceScoreAcrossCharge[chargeIdx]) { tempBestDistanceScoreAcrossCharge[chargeIdx] = newBcDist; if (localWin.MedianIntensity > 0) { tempBestIntensityScoreAcrossCharge[chargeIdx] = envelope.HighestIntensity / localWin.HighestIntensity; } else { tempBestIntensityScoreAcrossCharge[chargeIdx] = 1.0d; } } tempBestCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(tempBestCorrelationScoreAcrossCharge[chargeIdx], newCorr); if (newBcDist < repEnvelopeBcDist2) { repEnvelopeBcDist2 = newBcDist; repEnvelope2 = envelope; } } } var bcDist = TheoreticalEnvelope.GetBhattacharyyaDistance(summedIntensity); EnvelopeDistanceScoreAcrossCharge[chargeIdx] = Math.Min(bcDist, EnvelopeDistanceScoreAcrossCharge[chargeIdx]); EnvelopeCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(TheoreticalEnvelope.GetPearsonCorrelation(summedIntensity), EnvelopeCorrelationScoreAcrossCharge[chargeIdx]); if (BestCharge[chargeIdx] < 1 || bcDist < bestChargeDist[chargeIdx]) { BestCharge[chargeIdx] = charge; bestChargeDist[chargeIdx] = bcDist; if (summedReferenceIntensity > 0) { EnvelopeIntensityScoreAcrossCharge[chargeIdx] = summedMostAbuIsotopeIntensity / summedReferenceIntensity; } //if (summedMedianIntensity > 0) EnvelopeIntensityScoreAcrossCharge[chargeIdx] = Math.Min(1.0, 0.1*(summedMostAbuIsotopeIntensity / summedMedianIntensity)); } if (bcDist < tempBestBcDist) { tempBestBcDist = bcDist; Array.Copy(summedIntensity, RepresentativeSummedEnvelop, RepresentativeSummedEnvelop.Length); } } // when good envellope is observed at only either even or odd charge... if (BestCorrelationScoreAcrossCharge[0] > 0.7 && BestCorrelationScoreAcrossCharge[1] < 0.5) { const int i = 1; BestCorrelationScoreAcrossCharge[i] = tempBestCorrelationScoreAcrossCharge[i]; BestIntensityScoreAcrossCharge[i] = tempBestIntensityScoreAcrossCharge[i]; BestDistanceScoreAcrossCharge[i] = tempBestDistanceScoreAcrossCharge[i]; } if (BestCorrelationScoreAcrossCharge[1] > 0.7 && BestCorrelationScoreAcrossCharge[0] < 0.5) { const int i = 0; BestCorrelationScoreAcrossCharge[i] = tempBestCorrelationScoreAcrossCharge[i]; BestIntensityScoreAcrossCharge[i] = tempBestIntensityScoreAcrossCharge[i]; BestDistanceScoreAcrossCharge[i] = tempBestDistanceScoreAcrossCharge[i]; } // normalize abudnace across charges var s = AbundanceDistributionAcrossCharge[0] + AbundanceDistributionAcrossCharge[1]; if (s > 0) { for (var chargeIdx = 0; chargeIdx < 2; chargeIdx++) { AbundanceDistributionAcrossCharge[chargeIdx] = AbundanceDistributionAcrossCharge[chargeIdx] / s; } } if (nCols > 1) { var evenChargeIdx = BestCharge[EvenCharge] - MinCharge; var oddChargeIdx = BestCharge[OddCharge] - MinCharge; XicCorrelationBetweenBestCharges[0] = FitScoreCalculator.GetPearsonCorrelation(Smoother.Smooth(chargeXic[evenChargeIdx]), Smoother.Smooth(chargeXic[oddChargeIdx])); XicCorrelationBetweenBestCharges[1] = FitScoreCalculator.GetPearsonCorrelation(Smoother.Smooth(xic2[EvenCharge]), Smoother.Smooth(xic2[OddCharge])); } if (repEnvelope == null && repEnvelope2 != null) { repEnvelope = repEnvelope2; } if (repEnvelope != null) { // set representative charge, mz and scanNum RepresentativeCharge = repEnvelope.Charge; RepresentativeMz = repEnvelope.RepresentativePeak.Mz; RepresentativeScanNum = repEnvelope.ScanNum; } _initScore = true; }
public void UpdateWithDecoyScore(List<Ms1Spectrum> ms1Spectra, int targetMinCharge, int targetMaxCharge) { var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex(); var ms1ScanNums = _run.GetMs1ScanVector(); var minCol = ms1ScanNumToIndex[MinScanNum]; var maxCol = ms1ScanNumToIndex[MaxScanNum]; MinCharge = targetMinCharge; MaxCharge = targetMaxCharge; var rnd = new Random(); var comparer = new MzComparerWithBinning(28); var mostAbuInternalIndex = TheoreticalEnvelope.IndexOrderByRanking[0]; var nRows = MaxCharge - MinCharge + 1; var nCols = maxCol - minCol + 1; Envelopes = new ObservedIsotopeEnvelope[nRows][]; for (var i = 0; i < nRows; i++) Envelopes[i] = new ObservedIsotopeEnvelope[nCols]; for (var charge = targetMinCharge; charge <= targetMaxCharge; charge++) { var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuInternalIndex); if (_run.MaxMs1Mz < mostAbuMz || mostAbuMz < _run.MinMs1Mz) continue; for (var col = minCol; col <= maxCol; col++) { var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz); var numMzBins = comparer.GetBinNumber(localWin.MaxMz) - comparer.GetBinNumber(localWin.MinMz) + 1; var peakSet = new Ms1Peak[TheoreticalEnvelope.Size]; for (var k = 0; k < peakSet.Length; k++) { var r = rnd.Next(0, numMzBins); if (r < localWin.PeakCount) peakSet[k] = (Ms1Peak) ms1Spectra[col].Peaks[r + localWin.PeakStartIndex]; } var env = new ObservedIsotopeEnvelope(Mass, charge, ms1ScanNums[col], peakSet, TheoreticalEnvelope); //AddObservedEnvelope(env); Envelopes[charge - MinCharge][col - minCol] = env; } } UpdateScore(ms1Spectra, false); }
public IsotopeEnvelopeStatisticalInfo PreformStatisticalSignificanceTest(ObservedIsotopeEnvelope envelope) { int peakStartIndex; Tuple<double, double> mzBoundary; //var refPeak = envelope.Peaks[envelope.RefIsotopeInternalIndex]; var mostAbuMz = 0d; var mostAbutPeakInternalIndex = envelope.TheoreticalEnvelope.IndexOrderByRanking[0]; if (envelope.Peaks[mostAbutPeakInternalIndex] != null) { mostAbuMz = envelope.Peaks[mostAbutPeakInternalIndex].Mz; } else { mostAbuMz = envelope.TheoreticalEnvelope.GetIsotopeMz(envelope.Charge, mostAbutPeakInternalIndex); } var rankings = GetLocalRankings(mostAbuMz, out peakStartIndex, out mzBoundary); // smallest delta_mz = 0.01 (th) ? var ret = new IsotopeEnvelopeStatisticalInfo { LocalMzStart = mzBoundary.Item1, LocalMzEnd = mzBoundary.Item2, NumberOfLocalPeaks = rankings.Length, NumberOfPossiblePeaks = (int)Math.Ceiling(100 * (mzBoundary.Item2 - mzBoundary.Item1)), NumberOfIsotopePeaks = envelope.Size, }; // calculate ranksum test score var ranksum = 0; var nRankSum = 0; for (var i = 0; i < envelope.Size; i++) { if (envelope.Peaks[i] == null || !envelope.Peaks[i].Active) continue; ret.NumberOfMatchedIsotopePeaks++; //if (isotopeList[i].Ratio > RelativeIntesnityThresholdForRankSum) //{ var localIndex = envelope.Peaks[i].IndexInSpectrum - peakStartIndex; if (localIndex >= rankings.Length || localIndex < 0) continue; ranksum += rankings[localIndex]; nRankSum++; //} } var pvalue = FitScoreCalculator.GetRankSumPvalue(ret.NumberOfLocalPeaks, nRankSum, ranksum); ret.RankSumScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50; // calculate poisson test score var n = ret.NumberOfPossiblePeaks; var k = ret.NumberOfIsotopePeaks; // # of theretical isotope ions of the mass within the local window var n1 = ret.NumberOfLocalPeaks; // # of detected ions within the local window var k1 = ret.NumberOfMatchedIsotopePeaks; // # of matched ions generating isotope envelope profile var lambda = ((double)n1 / (double)n) * k; pvalue = 1 - Poisson.CDF(lambda, k1); ret.PoissonScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50; return ret; }