Exemplo n.º 1
0
        public void TestQuantifyIdedProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var prsmReader = new ProteinSpectrumMatchReader(0.01);

            var filesProcessed = 0;

            var tolerance = new Tolerance(10);

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                filesProcessed++;

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                // PrSM To Feature
                var prsmToFeatureIdMap = new int[prsmList.Count];
                for (var k = 0; k < prsmToFeatureIdMap.Length; k++)
                {
                    prsmToFeatureIdMap[k] = -1;
                }

                // Feature To PrSM
                var featureToPrsm = new List <ProteinSpectrumMatchSet>();

                var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
                var featureList   = new List <LcMsPeakCluster>();
                var featureId     = 0;
                for (var j = 0; j < prsmList.Count; j++)
                {
                    if (prsmToFeatureIdMap[j] >= 0)
                    {
                        continue;
                    }

                    var match      = prsmList[j];
                    var minScanNum = match.ScanNum;
                    var maxScanNum = match.ScanNum;
                    var mass       = match.Mass;
                    var charge     = match.Charge;
                    var massTh     = tolerance.GetToleranceAsMz(mass);
                    var id1        = match.ProteinId;

                    var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                    var prsmSet = new ProteinSpectrumMatchSet(i)
                    {
                        match
                    };
                    if (feature == null)
                    {
                        feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge);
                        prsmToFeatureIdMap[j] = featureId;
                    }
                    else
                    {
                        prsmToFeatureIdMap[j] = featureId;
                        var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2);

                        for (var k = j + 1; k < prsmList.Count; k++)
                        {
                            var otherMatch = prsmList[k];
                            var id2        = otherMatch.ProteinId;
                            var et2        = run.GetElutionTime(otherMatch.ScanNum);

                            if (id1.Equals(id2) &&
                                feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol &&
                                Math.Abs(otherMatch.Mass - mass) < massTh)
                            {
                                prsmToFeatureIdMap[k] = featureId;
                                prsmSet.Add(otherMatch);
                            }
                        }
                    }
                    featureId++;

                    feature.Flag = 1;
                    featureList.Add(feature);
                    featureToPrsm.Add(prsmSet);
                }

                // Overlap between features???
                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    for (var k = j + 1; k < featureList.Count; k++)
                    {
                        var f2 = featureList[k];
                        if (f2.Flag < 1)
                        {
                            continue;
                        }

                        var prsm2 = featureToPrsm[k];
                        if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass))
                        {
                            continue;
                        }
                        if (!f1.CoElutedByNet(f2, 0.005))
                        {
                            continue;
                        }
                        if (!prsm1.ShareProteinId(prsm2))
                        {
                            continue;
                        }

                        // let us merge!!
                        if (f1.ScanLength > f2.ScanLength)
                        {
                            prsm1.AddRange(prsm2);
                            prsm2.Clear();
                            f2.Flag = 0;
                        }
                        else
                        {
                            prsm2.AddRange(prsm1);
                            prsm1.Clear();
                            f1.Flag = 0;
                        }
                    }
                }

                // now output results!!
                var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                var writer        = new StreamWriter(ms1ftFilePath);
                writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1);
                    var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1);
                    f1.ExpandScanRange(minScanNum, maxScanNum);

                    writer.Write("{0}\t", j + 1);
                    writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer.Close();

                Console.WriteLine(ms1ftFilePath);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since data files not found");
            }
        }
Exemplo n.º 2
0
        public void FillMissingFeatures(int dataSetIndex, double scoreThreshold = -30, IProgress <ProgressData> progressReporter = null)
        {
            if (_alignedFeatures == null)
            {
                return;
            }

            var run           = _runList[dataSetIndex];
            var ms1ScanNums   = run.GetMs1ScanVector();
            var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());

            var progressData = new ProgressData(progressReporter);

            for (var j = 0; j < CountAlignedFeatures; j++)
            {
                if (_alignedFeatures[j][dataSetIndex] != null)
                {
                    continue;
                }

                var mass       = 0d;
                var charge     = 0;
                var minScanNum = -1;
                var maxScanNum = ms1ScanNums.Last();
                var repFt      = GetRepFeatureInfo(_alignedFeatures[j]);
                mass   = repFt.Mass;
                charge = repFt.Charge;
                var minNet = repFt.MinNet;
                var maxNet = repFt.MaxNet;

                for (var k = 0; k < ms1ScanNums.Length; k++)
                {
                    var net = run.GetElutionTime(ms1ScanNums[k]) / run.GetElutionTime(run.MaxLcScan);
                    if (net > minNet && minScanNum < 0)
                    {
                        minScanNum = (k == 0) ? ms1ScanNums[k] : ms1ScanNums[k - 1];
                    }

                    if (net > maxNet)
                    {
                        maxScanNum = ms1ScanNums[k];
                        break;
                    }
                }

                if (minScanNum < 0)
                {
                    minScanNum = 0;
                }

                var newFt = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                _alignedFeatures[j][dataSetIndex] = (newFt == null) ? featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, repFt.MinCharge, repFt.MaxCharge) : newFt;

                /*
                 * var ft = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                 *
                 * if (ft == null || ft.Score < scoreThreshold)
                 *  _alignedFeatures[j][dataSetIndex] = featureFinder.CollectLcMsPeaksWithNoise(mass, charge, minScanNum,
                 *      maxScanNum, repFt.MinCharge, repFt.MaxCharge);
                 * else
                 *  _alignedFeatures[j][dataSetIndex] = ft;*/

                progressData.Report(j, this.CountAlignedFeatures);
            }

            featureFinder = null;
        }