public static string GetString(LcMsPeakCluster feature, bool scoreReport = false) { // should be called after calling UpdateScore & UpdateAbundance var sb = new StringBuilder(string.Format("{0}\t{1}\t{2}\t{3}\t{4:0.0000}\t{5}\t{6}\t{7:0.0000}\t{8:0.00}", feature.MinScanNum, feature.MaxScanNum, feature.MinCharge, feature.MaxCharge, feature.RepresentativeMass, feature.RepresentativeScanNum, feature.RepresentativeCharge, feature.RepresentativeMz, feature.Abundance)); sb.AppendFormat("\t{0:0}", feature.ApexScanNum); sb.AppendFormat("\t{0:0.00}", feature.ApexIntensity); sb.AppendFormat("\t{0:0.000}", feature.MinElutionTime); sb.AppendFormat("\t{0:0.000}", feature.MaxElutionTime); sb.AppendFormat("\t{0:0.000}", feature.ElutionLength); sb.Append("\t"); var intensity = feature.RepresentativeSummedEnvelop; var maxIntensity = intensity.Max(); for (var i = 0; i < intensity.Length; i++) { if (i != 0) { sb.Append(";"); } sb.AppendFormat("{0},{1:0.000}", feature.TheoreticalEnvelope.Isotopes[i].Index, intensity[i] / maxIntensity); } sb.Append(string.Format("\t{0:0.0000}", feature.Score)); if (scoreReport) { sb.AppendFormat("\t{0}", feature.BestCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0}", feature.BestCharge[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.BestCorrelationScoreAcrossCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.BestCorrelationScoreAcrossCharge[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.BestIntensityScoreAcrossCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.BestIntensityScoreAcrossCharge[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.EnvelopeCorrelationScoreAcrossCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.EnvelopeCorrelationScoreAcrossCharge[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.EnvelopeIntensityScoreAcrossCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.EnvelopeIntensityScoreAcrossCharge[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.XicCorrelationBetweenBestCharges[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.XicCorrelationBetweenBestCharges[LcMsPeakCluster.OddCharge]); sb.AppendFormat("\t{0:0.000}", feature.AbundanceDistributionAcrossCharge[LcMsPeakCluster.EvenCharge]); sb.AppendFormat("\t{0:0.000}", feature.AbundanceDistributionAcrossCharge[LcMsPeakCluster.OddCharge]); } return(sb.ToString()); }
private void OutputEnvelopPeakStat(int id, LcMsPeakCluster feature, StreamWriter writer) { /* * public double[] EnvelopeDistanceScoreAcrossCharge { get; internal set; } * public double[] EnvelopeCorrelationScoreAcrossCharge { get; internal set; } * public double[] EnvelopeIntensityScoreAcrossCharge { get; internal set; } * public double[] AbundanceDistributionAcrossCharge { get; internal set; } * public double[] BestCorrelationScoreAcrossCharge { get; private set; } * public double[] BestDistanceScoreAcrossCharge { get; private set; } * public double[] BestIntensityScoreAcrossCharge { get; private set; } */ //for(var charge = feature.MinCharge; charge <= feature.MaxCharge; charge++) for (var i = 0; i < 2; i++) { writer.Write(id); writer.Write("\t"); writer.Write(feature.Mass); writer.Write("\t"); writer.Write(feature.BestCharge[i]); writer.Write("\t"); writer.Write(feature.EnvelopeDistanceScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.EnvelopeCorrelationScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.EnvelopeIntensityScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.BestDistanceScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.BestCorrelationScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.BestIntensityScoreAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.AbundanceDistributionAcrossCharge[i]); writer.Write("\t"); writer.Write(feature.XicCorrelationBetweenBestCharges[0]); writer.Write("\t"); writer.Write(feature.XicCorrelationBetweenBestCharges[1]); //writer.Write("\t"); writer.Write("\n"); } }
private bool SimilarScore(LcMsPeakCluster f1, LcMsPeakCluster f2) { /*var maxScore = Math.Max(f1.Score, f2.Score); * var minScore = Math.Min(f1.Score, f2.Score); * if (minScore > 0 && maxScore > minScore*5) return false;*/ if (f1.Score >= _scorer.ScoreThreshold && f1.GoodEnougth && f2.Score >= _scorer.ScoreThreshold && f2.GoodEnougth) { return(true); } return(false); }
public double GetScore(LcMsPeakCluster feature) { var mi = (int)Math.Round((feature.Mass - _massBins[0]) / (_massBins[1] - _massBins[0])); mi = (int)Math.Min(Math.Max(mi, 0), _massBins.Length - 1); var score = 0d; var abundance = feature.AbundanceDistributionAcrossCharge; for (var i = 0; i < 2; i++) { //score += _chargeScoreTable[mi][charge - 1]; var abuScore = abundance[i]; var k = (int)Math.Min(Math.Max(Math.Round(abuScore / 0.001), 0), NumberOfBins - 1); score += _abuScoreTable[mi][k]; //if (!(abuScore > 0)) continue; var distScore = Math.Min(feature.EnvelopeDistanceScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(distScore / 0.001), 0), NumberOfBins - 1); score += _distScoreTableSummed[mi][k]; var corrScore = Math.Min(feature.EnvelopeCorrelationScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(corrScore / 0.001), 0), NumberOfBins - 1); score += _corrScoreTableSummed[mi][k]; var intScore = Math.Min(feature.EnvelopeIntensityScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(intScore / 0.001), 0), NumberOfBins - 1); score += _intScoreTableSummed[mi][k]; distScore = Math.Min(feature.BestDistanceScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(distScore / 0.001), 0), NumberOfBins - 1); score += _distScoreTable[mi][k]; corrScore = Math.Min(feature.BestCorrelationScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(corrScore / 0.001), 0), NumberOfBins - 1); score += _corrScoreTable[mi][k]; intScore = Math.Min(feature.BestIntensityScoreAcrossCharge[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(intScore / 0.001), 0), NumberOfBins - 1); score += _intScoreTable[mi][k]; var xicScore = Math.Min(feature.XicCorrelationBetweenBestCharges[i], 1.0d); k = (int)Math.Min(Math.Max(Math.Round(xicScore / 0.001), 0), NumberOfBins - 1); score += (i == 0) ? _xicScoreTable1[mi][k] : _xicScoreTable2[mi][k]; } return(score); }
public void TagMinorPeakOf(LcMsPeakCluster feature) { //if (_minorTaggedFeatures == null) _minorTaggedFeatures = new List<LcMsPeakCluster>(); //_minorTaggedFeatures.Add(feature); if (_minorTaggedFeatures == null) { _minorTaggedFeatures = new LcMsPeakCluster[2]; _minorTaggedFeatures[_countMinorTaggedFeatures++] = feature; } else { if (_countMinorTaggedFeatures >= _minorTaggedFeatures.Length) { Array.Resize(ref _minorTaggedFeatures, _minorTaggedFeatures.Length * 2); } _minorTaggedFeatures[_countMinorTaggedFeatures++] = feature; } }
public bool Add(LcMsPeakCluster newFeature) { if (newFeature.Score < _scorer.ScoreThreshold) { return(false); } if (!newFeature.GoodEnougth) { return(false); } for (var i = _featureList.Count - 1; i >= 0; i--) { var massDiff = Math.Abs(_featureList[i].RepresentativeMass - newFeature.RepresentativeMass); if (massDiff > 1.0d) { break; } if (massDiff < 1e-4) { var coeLen = _featureList[i].CoElutionLength(newFeature); if (coeLen > _featureList[i].ElutionLength * 0.7 || coeLen > newFeature.ElutionLength * 0.7) { return(false); } } } /* * foreach (var peak in newFeature.GetMajorPeaks()) * { * peak.TagMajorPeakOf(newFeature); * } * * foreach (var peak in newFeature.GetMinorPeaks()) * { * peak.TagMinorPeakOf(newFeature); * } */ _featureList.Add(newFeature); return(true); }
/// <summary> /// Converts a <see cref="LcMsPeakCluster"/> to a <see cref="Ms1FtEntryExtendedData"/> /// </summary> /// <param name="feature"></param> /// <returns></returns> private static Ms1FtEntryExtendedData ToMs1FtEntryExtendedData(this LcMsPeakCluster feature) { var extended = new Ms1FtEntryExtendedData { BestEvenCharge = feature.BestCharge[LcMsPeakCluster.EvenCharge], BestOddCharge = feature.BestCharge[LcMsPeakCluster.OddCharge], CorrEvenCharge = feature.BestCorrelationScoreAcrossCharge[LcMsPeakCluster.EvenCharge], CorrOddCharge = feature.BestCorrelationScoreAcrossCharge[LcMsPeakCluster.OddCharge], IntensityEvenCharge = feature.BestIntensityScoreAcrossCharge[LcMsPeakCluster.EvenCharge], IntensityOddCharge = feature.BestIntensityScoreAcrossCharge[LcMsPeakCluster.OddCharge], SummedCorrEvenCharge = feature.EnvelopeCorrelationScoreAcrossCharge[LcMsPeakCluster.EvenCharge], SummedCorrOddCharge = feature.EnvelopeCorrelationScoreAcrossCharge[LcMsPeakCluster.OddCharge], SummedIntensityEvenCharge = feature.EnvelopeIntensityScoreAcrossCharge[LcMsPeakCluster.EvenCharge], SummedIntensityOddCharge = feature.EnvelopeIntensityScoreAcrossCharge[LcMsPeakCluster.OddCharge], XicCorrBetCharges1 = feature.XicCorrelationBetweenBestCharges[LcMsPeakCluster.EvenCharge], XicCorrBetCharges2 = feature.XicCorrelationBetweenBestCharges[LcMsPeakCluster.OddCharge], AbundanceRatioEvenCharge = feature.AbundanceDistributionAcrossCharge[LcMsPeakCluster.EvenCharge], AbundanceRatioOddCharge = feature.AbundanceDistributionAcrossCharge[LcMsPeakCluster.OddCharge] }; return(extended); }
/// <summary> /// Converts a <see cref="LcMsPeakCluster"/> to a <see cref="Ms1FtEntry"/> /// </summary> /// <param name="feature"></param> /// <param name="featureId"></param> /// <returns></returns> public static Ms1FtEntry ToMs1FtEntry(this LcMsPeakCluster feature, int featureId = 0) { var intensity = feature.RepresentativeSummedEnvelop; var maxIntensity = intensity.Max(); var sb = new StringBuilder(); for (var i = 0; i < intensity.Length; i++) { if (i != 0) { sb.Append(";"); } sb.AppendFormat("{0},{1:0.000}", feature.TheoreticalEnvelope.Isotopes[i].Index, intensity[i] / maxIntensity); } var ms1FtEntry = new Ms1FtEntry { FeatureId = featureId, MinScan = feature.MinScanNum, MaxScan = feature.MaxScanNum, MinCharge = feature.MinCharge, MaxCharge = feature.MaxCharge, MonoMass = feature.RepresentativeMass, RepresentativeScan = feature.RepresentativeScanNum, RepresentativeCharge = feature.RepresentativeCharge, RepresentativeMz = feature.RepresentativeMz, Abundance = feature.Abundance, ApexScanNum = feature.ApexScanNum, ApexIntensity = feature.ApexIntensity, MinElutionTime = feature.MinElutionTime, MaxElutionTime = feature.MaxElutionTime, ElutionLength = feature.ElutionLength, Envelope = sb.ToString(), LikelihoodRatio = feature.Score, ExtendedData = feature.ToMs1FtEntryExtendedData() }; return(ms1FtEntry); }
public void ExtractLcMsFeaturesForTrainingSet() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult"; if (!Directory.Exists(idFileFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder); } var tolerance = new Tolerance(10); var tolerance2 = new Tolerance(20); var id = 1; for (var d = 0; d < TrainSetFileLists.Length; d++) { var dataset = TrainSetFileLists[d]; var dataname = Path.GetFileNameWithoutExtension(dataset); var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); var featureResult = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); if (!File.Exists(dataset)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset); continue; } if (!File.Exists(filtedIdResultFile)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile); continue; } var run = PbfLcMsRun.GetLcMsRun(dataset); var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset))); var decoyStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset))); var writer = new StreamWriter(featureResult); writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t"); writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n"); var tsvParser = new TsvFileParser(filtedIdResultFile); var featureFinder = new LcMsPeakMatrix(run); for (var i = 0; i < tsvParser.NumData; i++) { var minScan = int.Parse(tsvParser.GetData("MinScan")[i]); var maxScan = int.Parse(tsvParser.GetData("MaxScan")[i]); var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]); var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]); var mass = double.Parse(tsvParser.GetData("Mass")[i]); writer.Write(minScan); writer.Write("\t"); writer.Write(maxScan); writer.Write("\t"); writer.Write(minCharge); writer.Write("\t"); writer.Write(maxCharge); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); var binNum = featureFinder.Comparer.GetBinNumber(mass); var binMass = featureFinder.Comparer.GetMzAverage(binNum); var binNumList = (mass < binMass) ? new int[] { binNum, binNum - 1, binNum + 1 } : new int[] { binNum, binNum + 1, binNum - 1 }; LcMsPeakCluster refinedFeature = null; foreach (var bi in binNumList) { var tempList = new List <LcMsPeakCluster>(); var features = featureFinder.FindFeatures(bi); var massTh = (mass < 2000) ? tolerance2.GetToleranceAsTh(mass) : tolerance.GetToleranceAsTh(mass); foreach (var feature in features) { if (Math.Abs(mass - feature.Mass) < massTh) { tempList.Add(feature); } } //var nHits = 0; var highestAbu = 0d; //var scans = Enumerable.Range(minScan, maxScan - minScan + 1); foreach (var feature in tempList) { //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1); //var hitScans = scans.Intersect(scans2).Count(); if (feature.MinScanNum < 0.5 * (minScan + maxScan) && 0.5 * (minScan + maxScan) < feature.MaxScanNum) { if (feature.Abundance > highestAbu) { refinedFeature = feature; highestAbu = feature.Abundance; } } /*if (hitScans > 0) * { * refinedFeature = feature; * nHits = hitScans; * }*/ } if (refinedFeature != null) { break; } } if (refinedFeature != null) { writer.Write(refinedFeature.Mass); writer.Write("\t"); writer.Write(refinedFeature.MinScanNum); writer.Write("\t"); writer.Write(refinedFeature.MaxScanNum); writer.Write("\t"); writer.Write(refinedFeature.MinCharge); writer.Write("\t"); writer.Write(refinedFeature.MaxCharge); writer.Write("\t"); writer.Write(refinedFeature.MinElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime); writer.Write("\t"); var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan); writer.Write(good ? 1 : 0); writer.Write("\n"); //writer.Write(0); writer.Write("\t"); //writer.Write(0); writer.Write("\n"); OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter); var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz); refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2); OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter); id++; } else { writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\n"); } //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge); } writer.Close(); targetStatWriter.Close(); decoyStatWriter.Close(); Console.WriteLine(dataname); } }
private List <LcMsPeakCluster> MergeFeatures(LcMsPeakMatrix featureFinder, List <LcMsPeakCluster> features) { //foreach (var f in _featureList) f.ActivateAllPeaks(); var featureSet = new NodeSet <LcMsPeakCluster>(); featureSet.AddRange(features); var connectedFeatureSet = featureSet.ConnnectedComponents(_mergeComparer); var mergedFeatures = new List <LcMsPeakCluster>(); foreach (var fSet in connectedFeatureSet) { if (fSet.Count == 1) { mergedFeatures.Add(fSet[0]); } else { var maxScan = fSet.Max(f => f.MaxScanNum); var minScan = fSet.Min(f => f.MinScanNum); var maxCharge = fSet.Max(f => f.MaxCharge); var minCharge = fSet.Min(f => f.MinCharge); var maxScore = double.MinValue;//fSet.Max(f => f.Score); LcMsPeakCluster maxScoredClusterOriginal = null; LcMsPeakCluster maxScoredCluster = null; foreach (var f in fSet) { var newFeature = featureFinder.GetLcMsPeakCluster(f.RepresentativeMass, minCharge, maxCharge, minScan, maxScan); if (newFeature != null && (maxScoredCluster == null || newFeature.Score > maxScoredCluster.Score)) { maxScoredCluster = newFeature; } if (f.Score > maxScore) { maxScoredClusterOriginal = f; maxScore = f.Score; } } var feature = featureFinder.GetLcMsPeakCluster(fSet.Select(f => f.Mass).Mean(), minCharge, maxCharge, minScan, maxScan); if (feature != null && (maxScoredCluster == null || feature.Score > maxScoredCluster.Score)) { maxScoredCluster = feature; } //Console.WriteLine("------------- Merge -----------------"); //foreach (var f in fSet) Console.WriteLine("*\t{0}\t{1}\t{2}\t{3}", f.RepresentativeMass, f.MinScanNum, f.MaxScanNum, f.Score); //Console.WriteLine("**\t{0}\t{1}\t{2}\t{3}", maxScoredCluster.RepresentativeMass, maxScoredCluster.MinScanNum, maxScoredCluster.MaxScanNum, maxScoredCluster.Score); if (maxScoredCluster == null) { maxScoredCluster = maxScoredClusterOriginal; } if (maxScoredCluster != null && maxScoredCluster.Score < maxScore) { maxScoredCluster.Score = maxScore; } mergedFeatures.Add(maxScoredCluster); } //if (selectedFeature != null) postFilteredSet.Add(selectedFeature); } //return postFilteredSet.OrderBy(f => f.RepresentativeMass); return(mergedFeatures); }