public void FeatureFind(List <ProteinSpectrumMatch> prsms, LcMsRun run, string outTsvFilePath)
        {
            var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
            // write result files
            var tsvWriter = new StreamWriter(outTsvFilePath);

            tsvWriter.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString(false));

            var featureId = 1;

            foreach (var match in prsms)
            {
                var minScan = run.GetPrevScanNum(match.ScanNum, 1);
                var maxScan = run.GetNextScanNum(match.ScanNum, 1);
                var feature = featureFinder.GetLcMsPeakCluster(match.Mass, match.Charge, minScan, maxScan);

                if (feature == null)
                {
                    continue;
                }

                tsvWriter.WriteLine("{0}\t{1}", featureId, LcMsFeatureFinderLauncher.GetString(feature, false));
                featureId++;
            }

            tsvWriter.Close();
        }
Beispiel #2
0
        public void TestFeatureExampleForFigure()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1\CPTAC_Intact_rep6_15Jan15_Bane_C2-14-08-02RZ.pbf";

            //const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var scorer        = new LcMsFeatureLikelihood();
            var featureFinder = new LcMsPeakMatrix(run, scorer);
            var feature       = featureFinder.GetLcMsPeakCluster(28061.6177, 20, 34, 7624, 7736);

            var resultsFilePath = Path.Combine(Path.GetTempPath(), Path.GetFileNameWithoutExtension(rawFile) + "_peaks.txt");
            var writer          = new StreamWriter(resultsFilePath);

            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", "Scan", "Elution_Time", "Charge", "ID", "MZ", "Intensity", "Pearson_Correlation");

            var envelope = feature.TheoreticalEnvelope;

            foreach (var e in envelope.Isotopes)
            {
                Console.WriteLine(e.Ratio);
            }

            foreach (var env in feature.EnumerateEnvelopes())
            {
                var corr = env.PearsonCorrelation;
                for (var i = 0; i < envelope.Size; i++)
                {
                    var peak = env.Peaks[i];
                    if (peak == null)
                    {
                        continue;
                    }
                    writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", env.ScanNum, run.GetElutionTime(env.ScanNum), env.Charge, i, peak.Mz, peak.Intensity, corr);
                }
            }
            writer.Close();

            Console.WriteLine("Results are in file " + resultsFilePath);
        }
Beispiel #3
0
        public void TestQuantifyIdedProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var prsmReader = new ProteinSpectrumMatchReader(0.01);

            var filesProcessed = 0;

            var tolerance = new Tolerance(10);

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                filesProcessed++;

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                // PrSM To Feature
                var prsmToFeatureIdMap = new int[prsmList.Count];
                for (var k = 0; k < prsmToFeatureIdMap.Length; k++)
                {
                    prsmToFeatureIdMap[k] = -1;
                }

                // Feature To PrSM
                var featureToPrsm = new List <ProteinSpectrumMatchSet>();

                var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
                var featureList   = new List <LcMsPeakCluster>();
                var featureId     = 0;
                for (var j = 0; j < prsmList.Count; j++)
                {
                    if (prsmToFeatureIdMap[j] >= 0)
                    {
                        continue;
                    }

                    var match      = prsmList[j];
                    var minScanNum = match.ScanNum;
                    var maxScanNum = match.ScanNum;
                    var mass       = match.Mass;
                    var charge     = match.Charge;
                    var massTh     = tolerance.GetToleranceAsMz(mass);
                    var id1        = match.ProteinId;

                    var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                    var prsmSet = new ProteinSpectrumMatchSet(i)
                    {
                        match
                    };
                    if (feature == null)
                    {
                        feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge);
                        prsmToFeatureIdMap[j] = featureId;
                    }
                    else
                    {
                        prsmToFeatureIdMap[j] = featureId;
                        var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2);

                        for (var k = j + 1; k < prsmList.Count; k++)
                        {
                            var otherMatch = prsmList[k];
                            var id2        = otherMatch.ProteinId;
                            var et2        = run.GetElutionTime(otherMatch.ScanNum);

                            if (id1.Equals(id2) &&
                                feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol &&
                                Math.Abs(otherMatch.Mass - mass) < massTh)
                            {
                                prsmToFeatureIdMap[k] = featureId;
                                prsmSet.Add(otherMatch);
                            }
                        }
                    }
                    featureId++;

                    feature.Flag = 1;
                    featureList.Add(feature);
                    featureToPrsm.Add(prsmSet);
                }

                // Overlap between features???
                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    for (var k = j + 1; k < featureList.Count; k++)
                    {
                        var f2 = featureList[k];
                        if (f2.Flag < 1)
                        {
                            continue;
                        }

                        var prsm2 = featureToPrsm[k];
                        if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass))
                        {
                            continue;
                        }
                        if (!f1.CoElutedByNet(f2, 0.005))
                        {
                            continue;
                        }
                        if (!prsm1.ShareProteinId(prsm2))
                        {
                            continue;
                        }

                        // let us merge!!
                        if (f1.ScanLength > f2.ScanLength)
                        {
                            prsm1.AddRange(prsm2);
                            prsm2.Clear();
                            f2.Flag = 0;
                        }
                        else
                        {
                            prsm2.AddRange(prsm1);
                            prsm1.Clear();
                            f1.Flag = 0;
                        }
                    }
                }

                // now output results!!
                var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                var writer        = new StreamWriter(ms1ftFilePath);
                writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1);
                    var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1);
                    f1.ExpandScanRange(minScanNum, maxScanNum);

                    writer.Write("{0}\t", j + 1);
                    writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer.Close();

                Console.WriteLine(ms1ftFilePath);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since data files not found");
            }
        }
Beispiel #4
0
        public void TestLcMsFeatureXic()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1\CPTAC_Intact_rep2_15Jan15_Bane_C2-14-08-02RZ.pbf";

            //const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var scorer        = new LcMsFeatureLikelihood();
            var featureFinder = new LcMsPeakMatrix(run, scorer);
            var feature       = featureFinder.GetLcMsPeakCluster(2388.278, 4, 3774, 3907);

            //feature = featureFinder.GetLcMsPeakCluster(8151.3706, 7, 13, 4201, 4266);

            //feature = featureFinder.GetLcMsPeakCluster(8151.41789, 7, 13, 2861, 2941);

            var ms1ScanToIndex = run.GetMs1ScanNumToIndex();
            var minCol         = ms1ScanToIndex[feature.MinScanNum];
            var maxCol         = ms1ScanToIndex[feature.MaxScanNum];

            //var minRow = feature.MinCharge - LcMsPeakMatrix.MinScanCharge;
            //var maxRow = feature.MaxCharge - LcMsPeakMatrix.MinScanCharge;

            Console.WriteLine("---------------------------------------------------------------");
            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].PearsonCorrelation : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }
            Console.WriteLine("---------------------------------------------------------------");

            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].BhattacharyyaDistance : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }

            Console.WriteLine("---------------------------------------------------------------");

            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].Abundance : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }
        }
Beispiel #5
0
        public void FillMissingFeatures(int dataSetIndex, double scoreThreshold = -30, IProgress <ProgressData> progressReporter = null)
        {
            if (_alignedFeatures == null)
            {
                return;
            }

            var run           = _runList[dataSetIndex];
            var ms1ScanNums   = run.GetMs1ScanVector();
            var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());

            var progressData = new ProgressData(progressReporter);

            for (var j = 0; j < CountAlignedFeatures; j++)
            {
                if (_alignedFeatures[j][dataSetIndex] != null)
                {
                    continue;
                }

                var mass       = 0d;
                var charge     = 0;
                var minScanNum = -1;
                var maxScanNum = ms1ScanNums.Last();
                var repFt      = GetRepFeatureInfo(_alignedFeatures[j]);
                mass   = repFt.Mass;
                charge = repFt.Charge;
                var minNet = repFt.MinNet;
                var maxNet = repFt.MaxNet;

                for (var k = 0; k < ms1ScanNums.Length; k++)
                {
                    var net = run.GetElutionTime(ms1ScanNums[k]) / run.GetElutionTime(run.MaxLcScan);
                    if (net > minNet && minScanNum < 0)
                    {
                        minScanNum = (k == 0) ? ms1ScanNums[k] : ms1ScanNums[k - 1];
                    }

                    if (net > maxNet)
                    {
                        maxScanNum = ms1ScanNums[k];
                        break;
                    }
                }

                if (minScanNum < 0)
                {
                    minScanNum = 0;
                }

                var newFt = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                _alignedFeatures[j][dataSetIndex] = (newFt == null) ? featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, repFt.MinCharge, repFt.MaxCharge) : newFt;

                /*
                 * var ft = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                 *
                 * if (ft == null || ft.Score < scoreThreshold)
                 *  _alignedFeatures[j][dataSetIndex] = featureFinder.CollectLcMsPeaksWithNoise(mass, charge, minScanNum,
                 *      maxScanNum, repFt.MinCharge, repFt.MaxCharge);
                 * else
                 *  _alignedFeatures[j][dataSetIndex] = ft;*/

                progressData.Report(j, this.CountAlignedFeatures);
            }

            featureFinder = null;
        }
Beispiel #6
0
        private List <LcMsPeakCluster> MergeFeatures(LcMsPeakMatrix featureFinder, List <LcMsPeakCluster> features)
        {
            //foreach (var f in _featureList) f.ActivateAllPeaks();
            var featureSet = new NodeSet <LcMsPeakCluster>();

            featureSet.AddRange(features);

            var connectedFeatureSet = featureSet.ConnnectedComponents(_mergeComparer);
            var mergedFeatures      = new List <LcMsPeakCluster>();

            foreach (var fSet in connectedFeatureSet)
            {
                if (fSet.Count == 1)
                {
                    mergedFeatures.Add(fSet[0]);
                }
                else
                {
                    var             maxScan   = fSet.Max(f => f.MaxScanNum);
                    var             minScan   = fSet.Min(f => f.MinScanNum);
                    var             maxCharge = fSet.Max(f => f.MaxCharge);
                    var             minCharge = fSet.Min(f => f.MinCharge);
                    var             maxScore  = double.MinValue;//fSet.Max(f => f.Score);
                    LcMsPeakCluster maxScoredClusterOriginal = null;
                    LcMsPeakCluster maxScoredCluster         = null;
                    foreach (var f in fSet)
                    {
                        var newFeature = featureFinder.GetLcMsPeakCluster(f.RepresentativeMass, minCharge, maxCharge, minScan, maxScan);
                        if (newFeature != null && (maxScoredCluster == null || newFeature.Score > maxScoredCluster.Score))
                        {
                            maxScoredCluster = newFeature;
                        }

                        if (f.Score > maxScore)
                        {
                            maxScoredClusterOriginal = f;
                            maxScore = f.Score;
                        }
                    }
                    var feature = featureFinder.GetLcMsPeakCluster(fSet.Select(f => f.Mass).Mean(), minCharge, maxCharge, minScan, maxScan);
                    if (feature != null && (maxScoredCluster == null || feature.Score > maxScoredCluster.Score))
                    {
                        maxScoredCluster = feature;
                    }
                    //Console.WriteLine("------------- Merge -----------------");
                    //foreach (var f in fSet) Console.WriteLine("*\t{0}\t{1}\t{2}\t{3}", f.RepresentativeMass, f.MinScanNum, f.MaxScanNum, f.Score);
                    //Console.WriteLine("**\t{0}\t{1}\t{2}\t{3}", maxScoredCluster.RepresentativeMass, maxScoredCluster.MinScanNum, maxScoredCluster.MaxScanNum, maxScoredCluster.Score);
                    if (maxScoredCluster == null)
                    {
                        maxScoredCluster = maxScoredClusterOriginal;
                    }
                    if (maxScoredCluster != null && maxScoredCluster.Score < maxScore)
                    {
                        maxScoredCluster.Score = maxScore;
                    }
                    mergedFeatures.Add(maxScoredCluster);
                }
                //if (selectedFeature != null) postFilteredSet.Add(selectedFeature);
            }
            //return postFilteredSet.OrderBy(f => f.RepresentativeMass);

            return(mergedFeatures);
        }