Ejemplo n.º 1
0
        public void AddEnvelopes(int minCharge, int maxCharge, int minScanNum, int maxScanNum,
            IList<ObservedIsotopeEnvelope> envelopes = null)
        {
            var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex();
            var minCol = ms1ScanNumToIndex[minScanNum];
            var maxCol = ms1ScanNumToIndex[maxScanNum];

            var nRows = maxCharge - minCharge + 1;
            var nCols = maxCol - minCol + 1;

            MinCharge = minCharge;
            MaxCharge = maxCharge;
            MinScanNum = minScanNum;
            MaxScanNum = maxScanNum;

            Envelopes = new ObservedIsotopeEnvelope[nRows][];
            for(var i = 0; i < nRows; i++) Envelopes[i] = new ObservedIsotopeEnvelope[nCols];

            if (envelopes == null) return;

            foreach (var envelope in envelopes)
            {
                var i = envelope.Charge - MinCharge;
                var j = ms1ScanNumToIndex[envelope.ScanNum] - minCol;

                if (i < 0 || i >= nRows || j < 0 || j >= nCols) continue;
                Envelopes[i][j] = envelope;
            }
        }
Ejemplo n.º 2
0
        public bool CheckChargeState(ObservedIsotopeEnvelope envelope)
        {
            var checkCharge = envelope.Charge;

            if (checkCharge > 20)
            {
                return(true);                  //high charge (> +20), just pass
            }
            var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum;
            var peakEndIndex   = envelope.MaxMzPeak.IndexInSpectrum;
            var nPeaks         = peakEndIndex - peakStartIndex + 1;

            if (nPeaks < 10)
            {
                return(false);
            }
            if (envelope.NumberOfPeaks > nPeaks * 0.7)
            {
                return(true);
            }

            var tolerance = new Tolerance(5);
            var threshold = nPeaks * 0.5;
            var mzTol     = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz);

            var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4);
            var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60);
            var maxDeltaMz     = Constants.C13MinusC12 / minCheckCharge + mzTol;
            var nChargeGaps    = new int[maxCheckCharge - minCheckCharge + 1];

            for (var i = peakStartIndex; i <= peakEndIndex; i++)
            {
                for (var j = i + 1; j <= peakEndIndex; j++)
                {
                    var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz;

                    if (deltaMz > maxDeltaMz)
                    {
                        break;
                    }
                    for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++)
                    {
                        if (c < minCheckCharge || c > maxCheckCharge)
                        {
                            continue;
                        }
                        var k = (int)c - minCheckCharge;
                        nChargeGaps[k]++;

                        if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks)
                        {
                            return(false);
                        }
                    }
                }
            }

            return(true);
        }
Ejemplo n.º 3
0
        public void UpdateWithDecoyScore(List <Ms1Spectrum> ms1Spectra, int targetMinCharge, int targetMaxCharge)
        {
            var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex();
            var ms1ScanNums       = _run.GetMs1ScanVector();
            var minCol            = ms1ScanNumToIndex[MinScanNum];
            var maxCol            = ms1ScanNumToIndex[MaxScanNum];

            MinCharge = targetMinCharge;
            MaxCharge = targetMaxCharge;

            var rnd                  = new Random();
            var comparer             = new MzComparerWithBinning(28);
            var mostAbuInternalIndex = TheoreticalEnvelope.IndexOrderByRanking[0];

            var nRows = MaxCharge - MinCharge + 1;
            var nCols = maxCol - minCol + 1;

            Envelopes = new ObservedIsotopeEnvelope[nRows][];
            for (var i = 0; i < nRows; i++)
            {
                Envelopes[i] = new ObservedIsotopeEnvelope[nCols];
            }

            for (var charge = targetMinCharge; charge <= targetMaxCharge; charge++)
            {
                var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuInternalIndex);
                if (_run.MaxMs1Mz < mostAbuMz || mostAbuMz < _run.MinMs1Mz)
                {
                    continue;
                }

                for (var col = minCol; col <= maxCol; col++)
                {
                    var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz);

                    var numMzBins = comparer.GetBinNumber(localWin.MaxMz) - comparer.GetBinNumber(localWin.MinMz) + 1;
                    var peakSet   = new Ms1Peak[TheoreticalEnvelope.Size];

                    for (var k = 0; k < peakSet.Length; k++)
                    {
                        var r = rnd.Next(0, numMzBins);
                        if (r < localWin.PeakCount)
                        {
                            peakSet[k] = (Ms1Peak)ms1Spectra[col].Peaks[r + localWin.PeakStartIndex];
                        }
                    }

                    var env = new ObservedIsotopeEnvelope(Mass, charge, ms1ScanNums[col], peakSet, TheoreticalEnvelope);
                    //AddObservedEnvelope(env);
                    Envelopes[charge - MinCharge][col - minCol] = env;
                }
            }
            UpdateScore(ms1Spectra, false);
        }
Ejemplo n.º 4
0
 internal void Expand(ObservedIsotopeEnvelope envelope)
 {
     if (MaxScanNum < 0 || envelope.ScanNum > MaxScanNum)
     {
         MaxScanNum = envelope.ScanNum;
     }
     if (MinScanNum < 0 || envelope.ScanNum < MinScanNum)
     {
         MinScanNum = envelope.ScanNum;
     }
     if (MaxCharge < 0 || envelope.Charge > MaxCharge)
     {
         MaxCharge = envelope.Charge;
     }
     if (MinCharge < 0 || envelope.Charge < MinCharge)
     {
         MinCharge = envelope.Charge;
     }
 }
Ejemplo n.º 5
0
        public bool CheckChargeState(ObservedIsotopeEnvelope envelope)
        {
            var checkCharge = envelope.Charge;
            if (checkCharge > 20) return true; //high charge (> +20), just pass

            var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum;
            var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum;
            var nPeaks = peakEndIndex - peakStartIndex + 1;

            if (nPeaks < 10) return false;
            if (envelope.NumberOfPeaks > nPeaks * 0.7) return true;

            var tolerance = new Tolerance(5);
            var threshold = nPeaks * 0.5;
            var mzTol = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz);

            var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4);
            var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60);
            var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol;
            var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1];

            for (var i = peakStartIndex; i <= peakEndIndex; i++)
            {
                for (var j = i + 1; j <= peakEndIndex; j++)
                {
                    var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz;

                    if (deltaMz > maxDeltaMz) break;
                    for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++)
                    {
                        if (c < minCheckCharge || c > maxCheckCharge) continue;
                        var k = (int)c - minCheckCharge;
                        nChargeGaps[k]++;

                        if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) return false;
                    }
                }
            }

            return true;
        }
Ejemplo n.º 6
0
        public void AddEnvelopes(int minCharge, int maxCharge, int minScanNum, int maxScanNum,
                                 IList <ObservedIsotopeEnvelope> envelopes = null)
        {
            var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex();
            var minCol            = ms1ScanNumToIndex[minScanNum];
            var maxCol            = ms1ScanNumToIndex[maxScanNum];

            var nRows = maxCharge - minCharge + 1;
            var nCols = maxCol - minCol + 1;

            MinCharge  = minCharge;
            MaxCharge  = maxCharge;
            MinScanNum = minScanNum;
            MaxScanNum = maxScanNum;

            Envelopes = new ObservedIsotopeEnvelope[nRows][];
            for (var i = 0; i < nRows; i++)
            {
                Envelopes[i] = new ObservedIsotopeEnvelope[nCols];
            }

            if (envelopes == null)
            {
                return;
            }

            foreach (var envelope in envelopes)
            {
                var i = envelope.Charge - MinCharge;
                var j = ms1ScanNumToIndex[envelope.ScanNum] - minCol;

                if (i < 0 || i >= nRows || j < 0 || j >= nCols)
                {
                    continue;
                }
                Envelopes[i][j] = envelope;
            }
        }
Ejemplo n.º 7
0
 internal void Expand(ObservedIsotopeEnvelope envelope)
 {
     if (MaxScanNum < 0 || envelope.ScanNum > MaxScanNum)
     {
         MaxScanNum = envelope.ScanNum;
     }
     if (MinScanNum < 0 || envelope.ScanNum < MinScanNum)
     {
         MinScanNum = envelope.ScanNum;
     }
     if (MaxCharge < 0 || envelope.Charge > MaxCharge)
     {
         MaxCharge = envelope.Charge;
     }
     if (MinCharge < 0 || envelope.Charge < MinCharge)
     {
         MinCharge = envelope.Charge;
     }            
 }
Ejemplo n.º 8
0
 public LcMsPeakCluster(LcMsRun run, ObservedIsotopeEnvelope observedEnvelope)
     : this(run, observedEnvelope.TheoreticalEnvelope, observedEnvelope.MonoMass, observedEnvelope.Charge, 
     observedEnvelope.RepresentativePeak.Mz, observedEnvelope.ScanNum, observedEnvelope.Abundance)
 {
 }
Ejemplo n.º 9
0
        private LcMsPeakCluster CollectLcMsPeaks(double targetMass, int minRow, int maxRow, int minCol, int maxCol, bool reCollectAllPeaks = false)
        {
            var ms1ScanNums = Run.GetMs1ScanVector();
            var envelopes = new List<ObservedIsotopeEnvelope>();
            var bestBcDist = 100d;
            ObservedIsotopeEnvelope bestEnvelope = null;
            var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0];
            var tolerance = new Tolerance(Comparer.Ppm * 0.5);
            var massTol = tolerance.GetToleranceAsTh(targetMass);
            var nPeaksCutoff = NumberOfPeaksCutoff;

            var bcCutoff = GetSeedBcDistThreshold();
            var corrCutoff = GetSeedCorrThreshold();
            
            for (var i = minRow; i <= maxRow; i++)
            {
                for (var j = minCol; j <= maxCol; j++)
                {
                    if (reCollectAllPeaks) _featureMatrix[i][j].Init();

                    if (reCollectAllPeaks || !_featureMatrix[i][j].Exist || Math.Abs(_featureMatrix[i][j].AccurateMass - targetMass) > massTol)
                    {
                        var peaks = Ms1Spectra[j].GetAllIsotopePeaks(targetMass, i + _targetMinCharge, _theoreticalEnvelope, tolerance);

                        if (peaks.Count(p => p != null) > 0)
                        {
                            _featureMatrix[i][j].DivergenceDist = _theoreticalEnvelope.GetBhattacharyyaDistance(peaks); ;
                            _featureMatrix[i][j].AccurateMass = targetMass;
                            _featureMatrix[i][j].CorrelationCoeff = _theoreticalEnvelope.GetPearsonCorrelation(peaks); ;
                            Array.Copy(peaks, _featureMatrix[i][j].EnvelopePeaks, peaks.Length);    
                        }
                    }

                    if (!_featureMatrix[i][j].Exist) continue;
                    if (_featureMatrix[i][j].CountActivePeaks < nPeaksCutoff) continue;
                    if (_featureMatrix[i][j].DivergenceDist > bcCutoff && _featureMatrix[i][j].CorrelationCoeff < corrCutoff) continue; // exclude outliers
                    var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass, i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks, _theoreticalEnvelope);
                    envelopes.Add(envelope);

                    if (_featureMatrix[i][j].EnvelopePeaks[mostAbuInternalIndex] != null && _featureMatrix[i][j].DivergenceDist < bestBcDist)
                    {
                        bestBcDist = _featureMatrix[i][j].DivergenceDist;
                        bestEnvelope = envelope;
                    }
                }
            }

            if (bestEnvelope == null) return null;

            var cluster = new LcMsPeakCluster(Run, bestEnvelope);
            cluster.AddEnvelopes(minRow + _targetMinCharge, maxRow + _targetMinCharge, ms1ScanNums[minCol], ms1ScanNums[maxCol], envelopes);
            
            return cluster;
        }
Ejemplo n.º 10
0
        private IList<LcMsPeakCluster> GetLcMs1PeakClusters(int binNumber)
        {
            const int chargeNeighborGap = 4;
            var targetMass = Comparer.GetMzAverage(binNumber);
            BuildFeatureMatrix(targetMass); // should be called first

            var clusters = new List<LcMsPeakCluster>();

            // todo : bottom up dataset??
            if (_rows.Length < 2 || _cols.Length < 1) return clusters;

            var tempEnvelope = new double[_theoreticalEnvelope.Size];
            var tempEnvelope2 = new double[_theoreticalEnvelope.Size];
            
            var ms1ScanNums = Run.GetMs1ScanVector();
            var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex();
            var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0];
            var tolerance = new Tolerance(Comparer.Ppm*0.5);

            foreach (var seed in _seedEnvelopes.OrderBy(s=>s.Key).Select(s=> s.Value))
            {
                var row = seed.Charge - _targetMinCharge;
                var col = ms1ScanNumToIndex[seed.ScanNum];
                
                if (_featureMatrix[row][col].CheckedOutFlag) continue;

                var mostAbuMz = _theoreticalEnvelope.GetIsotopeMz(seed.Charge, mostAbuInternalIndex);
                var seedLocalWin = Ms1Spectra[col].GetLocalMzWindow(mostAbuMz);
                var poissonPvalue = seedLocalWin.GetPoissonTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size);
                var rankSumPvalue = seedLocalWin.GetRankSumTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size);

                var goodEnvelope = (rankSumPvalue < 0.01 || poissonPvalue < 0.01);
                if (!goodEnvelope) continue;

                var chargeCheck = CorrectChargeState(seed, Ms1Spectra[col]);
                if (!chargeCheck) continue;
                
                var seedMass = _featureMatrix[row][col].AccurateMass;
                var massTol = tolerance.GetToleranceAsTh(seedMass);
                var newCluster = new LcMsPeakCluster(Run, seed);

                Array.Clear(tempEnvelope, 0, tempEnvelope.Length);
                seed.Peaks.SumEnvelopeTo(tempEnvelope);

                var neighbors = new Queue<ObservedIsotopeEnvelope>();
                neighbors.Enqueue(seed); // pick a seed
                _featureMatrix[row][col].CheckedOutFlag = true;

                var summedBcDist = _featureMatrix[row][col].DivergenceDist;
                var summedCorr = _featureMatrix[row][col].CorrelationCoeff;

                while (neighbors.Count > 0)
                {
                    var cell = neighbors.Dequeue();
                    var charge = cell.Charge;

                    var minRw = (int)Math.Max(charge - _targetMinCharge - chargeNeighborGap, _rows.First());
                    var maxRw = (int)Math.Min(charge - _targetMinCharge + chargeNeighborGap, _rows.Last());
                    var currCol = ms1ScanNumToIndex[cell.ScanNum];

                    for (var k = 0; k < 5; k++)
                    {
                        var j = currCol;
                        if (k < 3) j += k;
                        else j -= (k - 2);

                        if (j < _cols.First() || j > _cols.Last()) continue;

                        for (var i = minRw; i <= maxRw; i++)
                        {
                            if (_featureMatrix[i][j].CheckedOutFlag) continue;
                            if (!(_featureMatrix[i][j].AccurateMass > 0)) continue;
                            if (Math.Abs(seedMass - _featureMatrix[i][j].AccurateMass) > massTol) continue;
                            
                            Array.Copy(tempEnvelope, tempEnvelope2, tempEnvelope2.Length);

                            _featureMatrix[i][j].EnvelopePeaks.SumEnvelopeTo(tempEnvelope);
                            var newDivergence = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope);
                            var newCorrelation = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope);
                            
                            if (_featureMatrix[i][j].DivergenceDist < 0.02 ||_featureMatrix[i][j].CorrelationCoeff > 0.7 || newDivergence < summedBcDist || newCorrelation > summedCorr)
                            {
                                var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass,
                                    i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks,
                                    _theoreticalEnvelope);

                                neighbors.Enqueue(envelope);
                                newCluster.Expand(envelope);
                                _featureMatrix[i][j].CheckedOutFlag = true;

                                summedBcDist = newDivergence;
                                summedCorr = newCorrelation;
                            }
                            else
                            {
                                Array.Copy(tempEnvelope2, tempEnvelope, tempEnvelope.Length);
                            }
                        }
                    }
                }

                LcMsPeakCluster refinedCluster = null;
                if (summedCorr > 0.5 || summedBcDist < 0.15)
                {
                    // re-update check-out map
                    SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], false);
                    refinedCluster = GetLcMsPeakCluster(newCluster.RepresentativeMass, newCluster.RepresentativeCharge, newCluster.MinScanNum, newCluster.MaxScanNum, true);
                }

                if (refinedCluster != null && (_scorer == null || (_scorer != null && refinedCluster.GoodEnougth && refinedCluster.Score >= _scorer.ScoreThreshold)))
                {
                    SetCheckOutFlag(_rows.First(), _rows.Last(), ms1ScanNumToIndex[refinedCluster.MinScanNum], ms1ScanNumToIndex[refinedCluster.MaxScanNum], true);
                    clusters.Add(refinedCluster);
                }
                else
                {
                    SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], true);
                }
            }
            return clusters;            
        }
Ejemplo n.º 11
0
        //public const double SNRthreshold = 1.4826;
        private void BuildFeatureMatrix(double targetMass)
        {
            InitFeatureMatrix();

            SetTargetMass(targetMass);
            
            var observedRows        = new BitArray(NRows);
            var observedCols        = new BitArray(NColumns);
            
            var mostAbuInternalIdx = _theoreticalEnvelope.IndexOrderByRanking[0];
            var totalElutionLength = Run.GetElutionTime(Run.MaxLcScan);
            var elutionSamplingHalfLen = Math.Max(Math.Min(totalElutionLength * 0.003, 5.0), 0.5);
            var neighborHalfColumns = (int) Math.Max((elutionSamplingHalfLen/totalElutionLength)*NColumns, 5);

            var targetMassBinNum = Comparer.GetBinNumber(targetMass);
            var tolerance = new Tolerance(Comparer.Ppm*0.5);

            var minMs1Mz = _ms1PeakList.First().Mz;
            var maxMs1Mz = _ms1PeakList.Last().Mz;
            
            var nPeaksCutoff = NumberOfPeaksCutoff;
            var bcSeedCutoff = GetSeedBcDistThreshold();
            var corrSeedCutoff = GetSeedCorrThreshold();
            var ms1ScanNums = Run.GetMs1ScanVector();
            var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex();
            
            var options = new ParallelOptions();
            if (_maxThreadCount > 0) options.MaxDegreeOfParallelism = _maxThreadCount;
            _seedEnvelopes.Clear();

            Parallel.ForEach(_rows, options, row =>
            {
                var charge = row + _targetMinCharge;

                for (var col = 0; col < NColumns; col++) _featureMatrix[row][col].Init();
               
                for (var k = 0; k < _theoreticalEnvelope.Size; k++)
                {
                    var i = _theoreticalEnvelope.IndexOrderByRanking[k];
                    var isotopeIndex = _theoreticalEnvelope.Isotopes[i].Index;
                    var isotopeMzLb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzStart(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum - 1), charge, isotopeIndex);
                    var isotopeMzUb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzEnd(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum + 1), charge, isotopeIndex);

                    if (isotopeMzLb < minMs1Mz || isotopeMzUb > maxMs1Mz) continue;
                    var st = _ms1PeakList.BinarySearch(new Ms1Peak(isotopeMzLb, 0, 0));

                    if (st < 0) st = ~st;

                    for (var j = st; j < _ms1PeakList.Count; j++)
                    {
                        var ms1Peak         = _ms1PeakList[j];
                        if (ms1Peak.Mz > isotopeMzUb) break;
                        var col = ms1Peak.Ms1SpecIndex;

                        if (k == 0) // most abundant peak
                        {
                            if (_featureMatrix[row][col].EnvelopePeaks[i] == null || ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity)
                            {
                                _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak;
                                _featureMatrix[row][col].AccurateMass = Ion.GetMonoIsotopicMass(ms1Peak.Mz, charge, isotopeIndex);
                            }
                        }
                        else
                        {
                            if (!(_featureMatrix[row][col].AccurateMass > 0)) continue;
                            var expectedPeakMz = Ion.GetIsotopeMz(_featureMatrix[row][col].AccurateMass, charge, isotopeIndex);
                            if (Math.Abs(expectedPeakMz - ms1Peak.Mz) > tolerance.GetToleranceAsTh(ms1Peak.Mz)) continue;
                            
                            // in case of existing isotope peaks, select peaks maximizing envelope similairty
                            if (_featureMatrix[row][col].EnvelopePeaks[i] != null)
                            {
                                if (_featureMatrix[row][col].CountActivePeaks == 1)
                                {
                                    if (ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity) _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak;
                                }
                                else
                                {
                                    var tmpPeak = _featureMatrix[row][col].EnvelopePeaks[i];
                                    var bc1 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks);
                                    _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak;
                                    var bc2 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks);
                                    if (bc1 < bc2) _featureMatrix[row][col].EnvelopePeaks[i] = tmpPeak;                                    
                                }
                            }
                            else
                            {
                                _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak;
                            }
                        }
                    }

                    if (k == 0)
                    {
                        // for cells missing most abundant peaks
                        for (var col = 0; col < NColumns; col++)
                        {
                            if (_featureMatrix[row][col].Exist) continue;
                            var highestIntensity = 0d;
                            var inferredAccurateMass = 0d;
                            // find the most intense abundant peak from neighboring cells
                            for (var j = Math.Max(col - neighborHalfColumns, 0); j <= Math.Min(col + neighborHalfColumns, NColumns - 1); j++)
                            {
                                var mostAbuPeak = _featureMatrix[row][j].EnvelopePeaks[mostAbuInternalIdx];
                                if (mostAbuPeak != null && mostAbuPeak.Intensity > highestIntensity)
                                {
                                    highestIntensity = mostAbuPeak.Intensity;
                                    inferredAccurateMass = _featureMatrix[row][j].AccurateMass;
                                }
                            }
                            _featureMatrix[row][col].AccurateMass = inferredAccurateMass;
                        }
                    }

                }

                for (var col = 0; col < NColumns; col++)
                {
                    if (!(_featureMatrix[row][col].Exist)) continue;
                    
                    if (_featureMatrix[row][col].CountActivePeaks >= nPeaksCutoff)
                    {
                        var corr = _theoreticalEnvelope.GetPearsonCorrelation(_featureMatrix[row][col].EnvelopePeaks);
                        var bcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks);
                        _featureMatrix[row][col].CorrelationCoeff = corr;
                        _featureMatrix[row][col].DivergenceDist = bcDist;

                        if (!observedRows[row]) observedRows[row] = true;
                        if (!observedCols[col]) observedCols[col] = true;
                        
                        // collect seed envelopes
                        var mostAbuPeak = _featureMatrix[row][col].EnvelopePeaks[mostAbuInternalIdx];
                        if (mostAbuPeak != null && (bcDist < bcSeedCutoff || corr < corrSeedCutoff))
                        {
                            var signalToNoiseRatio = mostAbuPeak.Intensity / Ms1Spectra[col].MedianIntensity;
                            if (signalToNoiseRatio > 3)
                            {
                                var seed = new ObservedIsotopeEnvelope(_featureMatrix[row][col].AccurateMass, row + _targetMinCharge, ms1ScanNums[col], _featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope);

                                lock (_seedEnvelopes)
                                {
                                    _seedEnvelopes.Add(new KeyValuePair<double, ObservedIsotopeEnvelope>(bcDist, seed));
                                }
                            }
                        }
                    }
                    else
                    {
                        _featureMatrix[row][col].AccurateMass = 0d;
                    }
                }
            }// end or row for-loop
            );

            var temp = new List<int>();
            for (var i = 0; i < observedRows.Length; i++) if (observedRows[i]) temp.Add(i);
            _rows = temp.ToArray();

            temp.Clear();
            for (var i = 0; i < observedCols.Length; i++) if (observedCols[i]) temp.Add(i);
            _cols = temp.ToArray();
        }
Ejemplo n.º 12
0
        private bool CorrectChargeState(ObservedIsotopeEnvelope envelope, Ms1Spectrum spectrum)
        {
            if (envelope.Charge > 20) return true; //high charge (> +20), just pass

            var peaks = spectrum.Peaks;
            var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum;
            var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum;
            var intensityThreshold = envelope.HighestIntensity * 0.15;
            
            var nPeaks = 0;
            for (var i = peakStartIndex; i <= peakEndIndex; i++)
            {
                if (peaks[i].Intensity > intensityThreshold) nPeaks++;
            }

            if (envelope.NumberOfPeaks > nPeaks * 0.7) return true;

            //var tolerance = new Tolerance(5);
            var tolerance = new Tolerance(Comparer.Ppm * 0.5);
            var threshold = nPeaks * 0.5;
            var threshold2 = envelope.NumberOfPeaks + (envelope.TheoreticalEnvelope.Size - 1) * 0.7;

            var mzTol = tolerance.GetToleranceAsTh(peaks[peakStartIndex].Mz);

            var minCheckCharge = Math.Max(envelope.Charge * 2 - 1, 4);
            var maxCheckCharge = Math.Min(envelope.Charge * 5 + 1, 60);
            var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol;
            var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1];
            
            for (var i = peakStartIndex; i <= peakEndIndex; i++)
            {
                if (!(peaks[i].Intensity > intensityThreshold)) continue;
                
                for (var j = i + 1; j <= peakEndIndex; j++)
                {
                    if (!(peaks[j].Intensity > intensityThreshold)) continue;
                    
                    var deltaMz = peaks[j].Mz - peaks[i].Mz;

                    if (deltaMz > maxDeltaMz) break;

                    if (Math.Abs(deltaMz - mzTol) < float.Epsilon)
                    {
                        // Peaks are too close together; continue 
                        continue;
                    }

                    for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++)
                    {
                        if (c < minCheckCharge)
                            continue;

                        if (c > maxCheckCharge)
                            break;

                        var k = (int)c - minCheckCharge;
                        nChargeGaps[k]++;

                        if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > threshold2) return false;
                    }
                }
            }

            return true;
        }
Ejemplo n.º 13
0
        public IsotopeEnvelopeStatisticalInfo PreformStatisticalSignificanceTest(ObservedIsotopeEnvelope envelope)
        {
            int peakStartIndex;
            Tuple <double, double> mzBoundary;

            //var refPeak = envelope.Peaks[envelope.RefIsotopeInternalIndex];

            var mostAbuMz = 0d;
            var mostAbutPeakInternalIndex = envelope.TheoreticalEnvelope.IndexOrderByRanking[0];

            if (envelope.Peaks[mostAbutPeakInternalIndex] != null)
            {
                mostAbuMz = envelope.Peaks[mostAbutPeakInternalIndex].Mz;
            }
            else
            {
                mostAbuMz = envelope.TheoreticalEnvelope.GetIsotopeMz(envelope.Charge, mostAbutPeakInternalIndex);
            }

            var rankings = GetLocalRankings(mostAbuMz, out peakStartIndex, out mzBoundary);

            // smallest delta_mz = 0.01 (th) ?
            var ret = new IsotopeEnvelopeStatisticalInfo
            {
                LocalMzStart          = mzBoundary.Item1,
                LocalMzEnd            = mzBoundary.Item2,
                NumberOfLocalPeaks    = rankings.Length,
                NumberOfPossiblePeaks = (int)Math.Ceiling(100 * (mzBoundary.Item2 - mzBoundary.Item1)),
                NumberOfIsotopePeaks  = envelope.Size,
            };

            // calculate ranksum test score
            var ranksum  = 0;
            var nRankSum = 0;

            for (var i = 0; i < envelope.Size; i++)
            {
                if (envelope.Peaks[i] == null || !envelope.Peaks[i].Active)
                {
                    continue;
                }
                ret.NumberOfMatchedIsotopePeaks++;

                //if (isotopeList[i].Ratio > RelativeIntesnityThresholdForRankSum)
                //{
                var localIndex = envelope.Peaks[i].IndexInSpectrum - peakStartIndex;
                if (localIndex >= rankings.Length || localIndex < 0)
                {
                    continue;
                }
                ranksum += rankings[localIndex];
                nRankSum++;
                //}
            }

            var pvalue = FitScoreCalculator.GetRankSumPvalue(ret.NumberOfLocalPeaks, nRankSum, ranksum);

            ret.RankSumScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50;

            // calculate poisson test score
            var n  = ret.NumberOfPossiblePeaks;
            var k  = ret.NumberOfIsotopePeaks;        // # of theretical isotope ions of the mass within the local window
            var n1 = ret.NumberOfLocalPeaks;          // # of detected ions within the local window
            var k1 = ret.NumberOfMatchedIsotopePeaks; // # of matched ions generating isotope envelope profile

            var lambda = ((double)n1 / (double)n) * k;

            pvalue           = 1 - Poisson.CDF(lambda, k1);
            ret.PoissonScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50;
            return(ret);
        }
Ejemplo n.º 14
0
        public void UpdateScore(List <Ms1Spectrum> ms1Spectra, bool pValueCheck = true)
        {
            var nRows             = MaxCharge - MinCharge + 1;
            var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex();
            var minCol            = ms1ScanNumToIndex[MinScanNum];
            var maxCol            = ms1ScanNumToIndex[MaxScanNum];
            var nCols             = maxCol - minCol + 1;
            var mostAbuIdx        = TheoreticalEnvelope.IndexOrderByRanking[0];

            ClearScore();

            var bestChargeDist = new double[] { 10.0d, 10.0d };
            // sum envelopes at each charge
            var summedIntensity = new double[TheoreticalEnvelope.Size];

            var xicLen      = nCols + 18;
            var xicStartIdx = 9;

            /*
             * if (nCols < 13)
             * {
             *  xicLen = 13;
             *  xicStartIdx = (int) Math.Floor((xicLen - nCols)*0.5);
             * }*/

            var xic2 = new double[2][];

            xic2[0] = new double[xicLen];
            xic2[1] = new double[xicLen];
            var chargeXic = new double[nRows][];

            var tempBestBcDist    = 10.0d;
            var repEnvelopeBcDist = 10.0d;
            ObservedIsotopeEnvelope repEnvelope = null;

            var repEnvelopeBcDist2 = 10.0d;
            ObservedIsotopeEnvelope repEnvelope2 = null;

            var tempBestDistanceScoreAcrossCharge = new double[2] {
                10, 10
            };
            var tempBestIntensityScoreAcrossCharge   = new double[2];
            var tempBestCorrelationScoreAcrossCharge = new double[2];

            for (var i = 0; i < nRows; i++)
            {
                var charge    = i + MinCharge;
                var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuIdx);
                Array.Clear(summedIntensity, 0, summedIntensity.Length);

                chargeXic[i] = new double[xicLen];

                var chargeIdx = (charge % 2 == 0) ? EvenCharge : OddCharge;
                var summedMostAbuIsotopeIntensity = 0d;
                var summedReferenceIntensity      = 0d;

                for (var j = 0; j < nCols; j++)
                {
                    var envelope = Envelopes[i][j];
                    var col      = minCol + j;

                    var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz);

                    if (envelope == null)
                    {
                        continue;
                    }

                    envelope.Peaks.SumEnvelopeTo(summedIntensity);
                    var mostAbuPeak = envelope.Peaks[mostAbuIdx];

                    if (mostAbuPeak != null && mostAbuPeak.Active)
                    {
                        summedMostAbuIsotopeIntensity += mostAbuPeak.Intensity;
                        summedReferenceIntensity      += localWin.HighestIntensity;
                    }
                    AbundanceDistributionAcrossCharge[chargeIdx] += envelope.Abundance;

                    var newBcDist = TheoreticalEnvelope.GetBhattacharyyaDistance(envelope.Peaks);
                    var newCorr   = TheoreticalEnvelope.GetPearsonCorrelation(envelope.Peaks);

                    var goodEnvelope = (newBcDist <0.07 || newCorr> 0.7);

                    if (goodEnvelope)
                    {
                        xic2[chargeIdx][xicStartIdx + j] += envelope.Abundance;
                        chargeXic[i][xicStartIdx + j]     = envelope.Abundance;
                    }

                    var levelOneEnvelope = true;
                    var levelTwoEnvelope = true;

                    if (pValueCheck)
                    {
                        var poissonPvalue = localWin.GetPoissonTestPvalue(envelope.Peaks, TheoreticalEnvelope.Size);
                        var rankSumPvalue = localWin.GetRankSumTestPvalue(envelope.Peaks, TheoreticalEnvelope.Size);
                        levelOneEnvelope = (rankSumPvalue < 0.01 && poissonPvalue < 0.01);
                        //levelTwoEnvelope = (rankSumPvalue < 0.05 || poissonPvalue < 0.05);
                    }

                    if (levelOneEnvelope)
                    {
                        if (newBcDist < BestDistanceScoreAcrossCharge[chargeIdx])
                        {
                            BestDistanceScoreAcrossCharge[chargeIdx] = newBcDist;
                            if (localWin.MedianIntensity > 0)
                            {
                                BestIntensityScoreAcrossCharge[chargeIdx] = envelope.HighestIntensity / localWin.HighestIntensity;
                            }
                            else
                            {
                                BestIntensityScoreAcrossCharge[chargeIdx] = 1.0d;
                            }
                        }

                        BestCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(BestCorrelationScoreAcrossCharge[chargeIdx], newCorr);

                        if (newBcDist < repEnvelopeBcDist)
                        {
                            repEnvelopeBcDist = newBcDist;
                            repEnvelope       = envelope;
                        }

                        // in the initial scoring, classify major and minor envelopes
                        if (!_initScore && goodEnvelope)
                        {
                            envelope.GoodEnough = true;
                        }
                    }

                    if (levelTwoEnvelope)
                    {
                        if (newBcDist < tempBestDistanceScoreAcrossCharge[chargeIdx])
                        {
                            tempBestDistanceScoreAcrossCharge[chargeIdx] = newBcDist;
                            if (localWin.MedianIntensity > 0)
                            {
                                tempBestIntensityScoreAcrossCharge[chargeIdx] = envelope.HighestIntensity / localWin.HighestIntensity;
                            }
                            else
                            {
                                tempBestIntensityScoreAcrossCharge[chargeIdx] = 1.0d;
                            }
                        }
                        tempBestCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(tempBestCorrelationScoreAcrossCharge[chargeIdx], newCorr);

                        if (newBcDist < repEnvelopeBcDist2)
                        {
                            repEnvelopeBcDist2 = newBcDist;
                            repEnvelope2       = envelope;
                        }
                    }
                }

                var bcDist = TheoreticalEnvelope.GetBhattacharyyaDistance(summedIntensity);
                EnvelopeDistanceScoreAcrossCharge[chargeIdx]    = Math.Min(bcDist, EnvelopeDistanceScoreAcrossCharge[chargeIdx]);
                EnvelopeCorrelationScoreAcrossCharge[chargeIdx] = Math.Max(TheoreticalEnvelope.GetPearsonCorrelation(summedIntensity), EnvelopeCorrelationScoreAcrossCharge[chargeIdx]);

                if (BestCharge[chargeIdx] < 1 || bcDist < bestChargeDist[chargeIdx])
                {
                    BestCharge[chargeIdx]     = charge;
                    bestChargeDist[chargeIdx] = bcDist;
                    if (summedReferenceIntensity > 0)
                    {
                        EnvelopeIntensityScoreAcrossCharge[chargeIdx] = summedMostAbuIsotopeIntensity / summedReferenceIntensity;
                    }
                    //if (summedMedianIntensity > 0) EnvelopeIntensityScoreAcrossCharge[chargeIdx] = Math.Min(1.0, 0.1*(summedMostAbuIsotopeIntensity / summedMedianIntensity));
                }

                if (bcDist < tempBestBcDist)
                {
                    tempBestBcDist = bcDist;
                    Array.Copy(summedIntensity, RepresentativeSummedEnvelop, RepresentativeSummedEnvelop.Length);
                }
            }

            // when good envellope is observed at only either even or odd charge...
            if (BestCorrelationScoreAcrossCharge[0] > 0.7 && BestCorrelationScoreAcrossCharge[1] < 0.5)
            {
                const int i = 1;
                BestCorrelationScoreAcrossCharge[i] = tempBestCorrelationScoreAcrossCharge[i];
                BestIntensityScoreAcrossCharge[i]   = tempBestIntensityScoreAcrossCharge[i];
                BestDistanceScoreAcrossCharge[i]    = tempBestDistanceScoreAcrossCharge[i];
            }

            if (BestCorrelationScoreAcrossCharge[1] > 0.7 && BestCorrelationScoreAcrossCharge[0] < 0.5)
            {
                const int i = 0;
                BestCorrelationScoreAcrossCharge[i] = tempBestCorrelationScoreAcrossCharge[i];
                BestIntensityScoreAcrossCharge[i]   = tempBestIntensityScoreAcrossCharge[i];
                BestDistanceScoreAcrossCharge[i]    = tempBestDistanceScoreAcrossCharge[i];
            }

            // normalize abudnace across charges
            var s = AbundanceDistributionAcrossCharge[0] + AbundanceDistributionAcrossCharge[1];

            if (s > 0)
            {
                for (var chargeIdx = 0; chargeIdx < 2; chargeIdx++)
                {
                    AbundanceDistributionAcrossCharge[chargeIdx] = AbundanceDistributionAcrossCharge[chargeIdx] / s;
                }
            }

            if (nCols > 1)
            {
                var evenChargeIdx = BestCharge[EvenCharge] - MinCharge;
                var oddChargeIdx  = BestCharge[OddCharge] - MinCharge;
                XicCorrelationBetweenBestCharges[0] = FitScoreCalculator.GetPearsonCorrelation(Smoother.Smooth(chargeXic[evenChargeIdx]), Smoother.Smooth(chargeXic[oddChargeIdx]));
                XicCorrelationBetweenBestCharges[1] = FitScoreCalculator.GetPearsonCorrelation(Smoother.Smooth(xic2[EvenCharge]), Smoother.Smooth(xic2[OddCharge]));
            }

            if (repEnvelope == null && repEnvelope2 != null)
            {
                repEnvelope = repEnvelope2;
            }

            if (repEnvelope != null)
            {
                // set representative charge, mz and scanNum
                RepresentativeCharge  = repEnvelope.Charge;
                RepresentativeMz      = repEnvelope.RepresentativePeak.Mz;
                RepresentativeScanNum = repEnvelope.ScanNum;
            }

            _initScore = true;
        }
Ejemplo n.º 15
0
 public LcMsPeakCluster(LcMsRun run, ObservedIsotopeEnvelope observedEnvelope)
     : this(run, observedEnvelope.TheoreticalEnvelope, observedEnvelope.MonoMass, observedEnvelope.Charge,
            observedEnvelope.RepresentativePeak.Mz, observedEnvelope.ScanNum, observedEnvelope.Abundance)
 {
 }
Ejemplo n.º 16
0
        public void UpdateWithDecoyScore(List<Ms1Spectrum> ms1Spectra, int targetMinCharge, int targetMaxCharge)
        {
            var ms1ScanNumToIndex = _run.GetMs1ScanNumToIndex();
            var ms1ScanNums = _run.GetMs1ScanVector();
            var minCol = ms1ScanNumToIndex[MinScanNum];
            var maxCol = ms1ScanNumToIndex[MaxScanNum];
            MinCharge = targetMinCharge;
            MaxCharge = targetMaxCharge;
            
            var rnd = new Random();
            var comparer = new MzComparerWithBinning(28);
            var mostAbuInternalIndex = TheoreticalEnvelope.IndexOrderByRanking[0];

            var nRows = MaxCharge - MinCharge + 1;
            var nCols = maxCol - minCol + 1;

            Envelopes = new ObservedIsotopeEnvelope[nRows][];
            for (var i = 0; i < nRows; i++) Envelopes[i] = new ObservedIsotopeEnvelope[nCols];

            for (var charge = targetMinCharge; charge <= targetMaxCharge; charge++)
            {
                var mostAbuMz = TheoreticalEnvelope.GetIsotopeMz(charge, mostAbuInternalIndex);
                if (_run.MaxMs1Mz < mostAbuMz || mostAbuMz < _run.MinMs1Mz) continue;

                for (var col = minCol; col <= maxCol; col++)
                {
                    var localWin = ms1Spectra[col].GetLocalMzWindow(mostAbuMz);

                    var numMzBins = comparer.GetBinNumber(localWin.MaxMz) - comparer.GetBinNumber(localWin.MinMz) + 1;
                    var peakSet = new Ms1Peak[TheoreticalEnvelope.Size];

                    for (var k = 0; k < peakSet.Length; k++)
                    {
                        var r = rnd.Next(0, numMzBins);
                        if (r < localWin.PeakCount)
                            peakSet[k] = (Ms1Peak) ms1Spectra[col].Peaks[r + localWin.PeakStartIndex];
                    }

                    var env = new ObservedIsotopeEnvelope(Mass, charge, ms1ScanNums[col], peakSet, TheoreticalEnvelope);
                    //AddObservedEnvelope(env);
                    Envelopes[charge - MinCharge][col - minCol] = env;
                }
            }
            UpdateScore(ms1Spectra, false);
        }
Ejemplo n.º 17
0
        public IsotopeEnvelopeStatisticalInfo PreformStatisticalSignificanceTest(ObservedIsotopeEnvelope envelope)
        {
            int peakStartIndex;
            Tuple<double, double> mzBoundary;
            
            //var refPeak = envelope.Peaks[envelope.RefIsotopeInternalIndex];
            
            var mostAbuMz = 0d;
            var mostAbutPeakInternalIndex = envelope.TheoreticalEnvelope.IndexOrderByRanking[0];
            if (envelope.Peaks[mostAbutPeakInternalIndex] != null)
            {
                mostAbuMz = envelope.Peaks[mostAbutPeakInternalIndex].Mz;
            }
            else
            {
                mostAbuMz = envelope.TheoreticalEnvelope.GetIsotopeMz(envelope.Charge, mostAbutPeakInternalIndex);
            }

            var rankings = GetLocalRankings(mostAbuMz, out peakStartIndex, out mzBoundary);
            
            // smallest delta_mz = 0.01 (th) ?
            var ret = new IsotopeEnvelopeStatisticalInfo
            {
                LocalMzStart = mzBoundary.Item1,
                LocalMzEnd = mzBoundary.Item2,
                NumberOfLocalPeaks = rankings.Length,
                NumberOfPossiblePeaks = (int)Math.Ceiling(100 * (mzBoundary.Item2 - mzBoundary.Item1)),
                NumberOfIsotopePeaks = envelope.Size,
            };

            // calculate ranksum test score
            var ranksum = 0;
            var nRankSum = 0;
            for (var i = 0; i < envelope.Size; i++)
            {
                if (envelope.Peaks[i] == null || !envelope.Peaks[i].Active) continue;
                ret.NumberOfMatchedIsotopePeaks++;

                //if (isotopeList[i].Ratio > RelativeIntesnityThresholdForRankSum)
                //{
                var localIndex = envelope.Peaks[i].IndexInSpectrum - peakStartIndex;
                if (localIndex >= rankings.Length || localIndex < 0) continue;
                ranksum += rankings[localIndex];
                nRankSum++;
                //}
            }

            var pvalue = FitScoreCalculator.GetRankSumPvalue(ret.NumberOfLocalPeaks, nRankSum, ranksum);
            ret.RankSumScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50;

            // calculate poisson test score
            var n = ret.NumberOfPossiblePeaks;
            var k = ret.NumberOfIsotopePeaks; // # of theretical isotope ions of the mass within the local window
            var n1 = ret.NumberOfLocalPeaks; // # of detected ions within the local window
            var k1 = ret.NumberOfMatchedIsotopePeaks; // # of matched ions generating isotope envelope profile

            var lambda = ((double)n1 / (double)n) * k;
            pvalue = 1 - Poisson.CDF(lambda, k1);
            ret.PoissonScore = (pvalue > 0) ? -Math.Log(pvalue, 2) : 50;
            return ret;
        }