コード例 #1
0
        public ScanBasedTagSearchEngine(
            LcMsRun run,
            ISequenceTagFinder seqTagFinder,
            LcMsPeakMatrix featureFinder,
            FastaDatabase fastaDb,
            Tolerance tolerance,
            AminoAcidSet aaSet,
            CompositeScorerFactory ms2ScorerFactory = null,
            int minMatchedTagLength = DefaultMinMatchedTagLength,
            double maxSequenceMass  = 50000.0,
            int minProductIonCharge = 1,
            int maxProductIonCharge = 20)
        {
            _run           = run;
            _featureFinder = featureFinder;

            _searchableDb = new SearchableDatabase(fastaDb);

            _tolerance           = tolerance;
            _aaSet               = aaSet;
            _minMatchedTagLength = minMatchedTagLength;
            _maxSequenceMass     = maxSequenceMass;
            _minProductIonCharge = minProductIonCharge;
            _maxProductIonCharge = maxProductIonCharge;
            MinScan              = int.MinValue;
            MaxScan              = int.MaxValue;
            _ms2ScorerFactory    = ms2ScorerFactory;
            _seqTagFinder        = seqTagFinder;
        }
コード例 #2
0
        public void FeatureFind(List <ProteinSpectrumMatch> prsms, LcMsRun run, string outTsvFilePath)
        {
            var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
            // write result files
            var tsvWriter = new StreamWriter(outTsvFilePath);

            tsvWriter.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString(false));

            var featureId = 1;

            foreach (var match in prsms)
            {
                var minScan = run.GetPrevScanNum(match.ScanNum, 1);
                var maxScan = run.GetNextScanNum(match.ScanNum, 1);
                var feature = featureFinder.GetLcMsPeakCluster(match.Mass, match.Charge, minScan, maxScan);

                if (feature == null)
                {
                    continue;
                }

                tsvWriter.WriteLine("{0}\t{1}", featureId, LcMsFeatureFinderLauncher.GetString(feature, false));
                featureId++;
            }

            tsvWriter.Close();
        }
コード例 #3
0
        private int FilterAndOutputFeatures(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, string ms1FeaturesFilePath)
        {
            var featureCounter = new int[1];

            Ms1FtEntry.WriteToFile(ms1FeaturesFilePath, FilterFeaturesWithOutput(container, featureFinder, outCsvFilePath, featureCounter), Parameters.ScoreReport);

            return(featureCounter[0]);
        }
コード例 #4
0
        private int FilterAndOutputFeaturesOld(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, string ms1FeaturesFilePath)
        {
            var featureId = 0;

            Stream csvStream = new MemoryStream();

            if (Parameters.CsvOutput)
            {
                csvStream = new FileStream(outCsvFilePath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
            }
            // write result files
            using (var tsvWriter = new StreamWriter(ms1FeaturesFilePath))
                using (var csvWriter = new StreamWriter(csvStream))
                {
                    tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

                    if (Parameters.CsvOutput)
                    {
                        csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
                    }

                    var filteredFeatures = container.GetFilteredFeatures(featureFinder);
                    foreach (var feature in filteredFeatures)
                    {
                        featureId++;
                        tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                        var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                        if (Parameters.CsvOutput)
                        {
                            foreach (var envelope in feature.EnumerateEnvelopes())
                            {
                                //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                                var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                                if (mostAbuPeak == null || !mostAbuPeak.Active)
                                {
                                    continue;
                                }

                                var fitscore = 1.0 - feature.BestCorrelationScore;
                                csvWriter.WriteLine("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance,
                                                    mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId);
                            }
                        }
                    }
                }

            return(featureId);
        }
コード例 #5
0
        public void TestFeatureExampleForFigure()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1\CPTAC_Intact_rep6_15Jan15_Bane_C2-14-08-02RZ.pbf";

            //const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var scorer        = new LcMsFeatureLikelihood();
            var featureFinder = new LcMsPeakMatrix(run, scorer);
            var feature       = featureFinder.GetLcMsPeakCluster(28061.6177, 20, 34, 7624, 7736);

            var resultsFilePath = Path.Combine(Path.GetTempPath(), Path.GetFileNameWithoutExtension(rawFile) + "_peaks.txt");
            var writer          = new StreamWriter(resultsFilePath);

            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", "Scan", "Elution_Time", "Charge", "ID", "MZ", "Intensity", "Pearson_Correlation");

            var envelope = feature.TheoreticalEnvelope;

            foreach (var e in envelope.Isotopes)
            {
                Console.WriteLine(e.Ratio);
            }

            foreach (var env in feature.EnumerateEnvelopes())
            {
                var corr = env.PearsonCorrelation;
                for (var i = 0; i < envelope.Size; i++)
                {
                    var peak = env.Peaks[i];
                    if (peak == null)
                    {
                        continue;
                    }
                    writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n", env.ScanNum, run.GetElutionTime(env.ScanNum), env.Charge, i, peak.Mz, peak.Intensity, corr);
                }
            }
            writer.Close();

            Console.WriteLine("Results are in file " + resultsFilePath);
        }
コード例 #6
0
 public TagMatchFinder(
     ProductSpectrum spec,
     IScorer ms2Scorer,
     LcMsPeakMatrix featureFinder,
     string proteinSequence,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass)
 {
     _spec            = spec;
     _ms2Scorer       = ms2Scorer;
     _featureFinder   = featureFinder;
     _proteinSequence = proteinSequence;
     _tolerance       = tolerance;
     _aaSet           = aaSet;
     _maxSequenceMass = maxSequenceMass;
 }
コード例 #7
0
        public IEnumerable <LcMsPeakCluster> GetFilteredFeatures(LcMsPeakMatrix featureFinder)
        {
            var mergedFeatures = MergeFeatures(featureFinder, _featureList);

            SetFeatureList(mergedFeatures);
            var connectedFeatures = GetAllConnectedFeatures();
            var filteredFeatures  = GetFilteredFeatures(connectedFeatures);

            return(filteredFeatures.OrderBy(f => f.RepresentativeMass));

            /*
             * var connectedFeatures = GetAllConnectedFeatures();
             * var filteredFeatures = GetFilteredFeatures(connectedFeatures);
             * var mergedFeatures = MergeFeatures(featureFinder, filteredFeatures);
             * return mergedFeatures.OrderBy(f => f.RepresentativeMass);
             */
        }
コード例 #8
0
        public void TestMs1EvidenceScore()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var testRawFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\Lewy_ManyMods\Lewy_intact_01.pbf");

            if (!File.Exists(testRawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, testRawFile);
            }

            var testResultFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\Lewy_ManyMods\TestOutput\Lewy_intact_01_IcTda.tsv");

            if (!File.Exists(testResultFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, testResultFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(testRawFile);
            var tsvParser     = new TsvFileParser(testResultFile);
            var featureFinder = new LcMsPeakMatrix(run);

            for (var i = 0; i < tsvParser.NumData; i++)
            {
                var scan   = int.Parse(tsvParser.GetData("Scan")[i]);
                var charge = int.Parse(tsvParser.GetData("Charge")[i]);
                var mass   = double.Parse(tsvParser.GetData("Mass")[i]);
                var qvalue = double.Parse(tsvParser.GetData("QValue")[i]);

                //var targetFeature = new TargetFeature(mass, charge, scan);
                var score = featureFinder.GetMs1EvidenceScore(scan, mass, charge);
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", scan, mass, charge, qvalue, score);
            }
        }
コード例 #9
0
ファイル: TestProMex.cs プロジェクト: javamng/GitHUB
        public void TestMs1EvidenceScore()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string TestRawFile = @"\\protoapps\UserData\Jungkap\Lewy\Lewy_intact_01.pbf";

            if (!File.Exists(TestRawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile);
            }

            const string TestResultFile = @"\\protoapps\UserData\Jungkap\Lewy\Lewy_intact_01_IcTda.tsv";

            if (!File.Exists(TestResultFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(TestRawFile);
            var tsvParser     = new TsvFileParser(TestResultFile);
            var featureFinder = new LcMsPeakMatrix(run);

            for (var i = 0; i < tsvParser.NumData; i++)
            {
                var scan   = int.Parse(tsvParser.GetData("Scan")[i]);
                var charge = int.Parse(tsvParser.GetData("Charge")[i]);
                var mass   = double.Parse(tsvParser.GetData("Mass")[i]);
                var qvalue = double.Parse(tsvParser.GetData("QValue")[i]);

                //var targetFeature = new TargetFeature(mass, charge, scan);
                var score = featureFinder.GetMs1EvidenceScore(scan, mass, charge);
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", scan, mass, charge, qvalue, score);
            }
        }
コード例 #10
0
        private List <LcMsPeakCluster> MergeFeatures(LcMsPeakMatrix featureFinder, List <LcMsPeakCluster> features)
        {
            //foreach (var f in _featureList) f.ActivateAllPeaks();
            var featureSet = new NodeSet <LcMsPeakCluster>();

            featureSet.AddRange(features);

            var connectedFeatureSet = featureSet.ConnnectedComponents(_mergeComparer);
            var mergedFeatures      = new List <LcMsPeakCluster>();

            foreach (var fSet in connectedFeatureSet)
            {
                if (fSet.Count == 1)
                {
                    mergedFeatures.Add(fSet[0]);
                }
                else
                {
                    var             maxScan   = fSet.Max(f => f.MaxScanNum);
                    var             minScan   = fSet.Min(f => f.MinScanNum);
                    var             maxCharge = fSet.Max(f => f.MaxCharge);
                    var             minCharge = fSet.Min(f => f.MinCharge);
                    var             maxScore  = double.MinValue;//fSet.Max(f => f.Score);
                    LcMsPeakCluster maxScoredClusterOriginal = null;
                    LcMsPeakCluster maxScoredCluster         = null;
                    foreach (var f in fSet)
                    {
                        var newFeature = featureFinder.GetLcMsPeakCluster(f.RepresentativeMass, minCharge, maxCharge, minScan, maxScan);
                        if (newFeature != null && (maxScoredCluster == null || newFeature.Score > maxScoredCluster.Score))
                        {
                            maxScoredCluster = newFeature;
                        }

                        if (f.Score > maxScore)
                        {
                            maxScoredClusterOriginal = f;
                            maxScore = f.Score;
                        }
                    }
                    var feature = featureFinder.GetLcMsPeakCluster(fSet.Select(f => f.Mass).Mean(), minCharge, maxCharge, minScan, maxScan);
                    if (feature != null && (maxScoredCluster == null || feature.Score > maxScoredCluster.Score))
                    {
                        maxScoredCluster = feature;
                    }
                    //Console.WriteLine("------------- Merge -----------------");
                    //foreach (var f in fSet) Console.WriteLine("*\t{0}\t{1}\t{2}\t{3}", f.RepresentativeMass, f.MinScanNum, f.MaxScanNum, f.Score);
                    //Console.WriteLine("**\t{0}\t{1}\t{2}\t{3}", maxScoredCluster.RepresentativeMass, maxScoredCluster.MinScanNum, maxScoredCluster.MaxScanNum, maxScoredCluster.Score);
                    if (maxScoredCluster == null)
                    {
                        maxScoredCluster = maxScoredClusterOriginal;
                    }
                    if (maxScoredCluster != null && maxScoredCluster.Score < maxScore)
                    {
                        maxScoredCluster.Score = maxScore;
                    }
                    mergedFeatures.Add(maxScoredCluster);
                }
                //if (selectedFeature != null) postFilteredSet.Add(selectedFeature);
            }
            //return postFilteredSet.OrderBy(f => f.RepresentativeMass);

            return(mergedFeatures);
        }
コード例 #11
0
        public void TestLcMsFeatureXic()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1\CPTAC_Intact_rep2_15Jan15_Bane_C2-14-08-02RZ.pbf";

            //const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var scorer        = new LcMsFeatureLikelihood();
            var featureFinder = new LcMsPeakMatrix(run, scorer);
            var feature       = featureFinder.GetLcMsPeakCluster(2388.278, 4, 3774, 3907);

            //feature = featureFinder.GetLcMsPeakCluster(8151.3706, 7, 13, 4201, 4266);

            //feature = featureFinder.GetLcMsPeakCluster(8151.41789, 7, 13, 2861, 2941);

            var ms1ScanToIndex = run.GetMs1ScanNumToIndex();
            var minCol         = ms1ScanToIndex[feature.MinScanNum];
            var maxCol         = ms1ScanToIndex[feature.MaxScanNum];

            //var minRow = feature.MinCharge - LcMsPeakMatrix.MinScanCharge;
            //var maxRow = feature.MaxCharge - LcMsPeakMatrix.MinScanCharge;

            Console.WriteLine("---------------------------------------------------------------");
            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].PearsonCorrelation : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }
            Console.WriteLine("---------------------------------------------------------------");

            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].BhattacharyyaDistance : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }

            Console.WriteLine("---------------------------------------------------------------");

            for (var i = 0; i < feature.Envelopes.Length; i++)
            {
                for (var j = 0; j < feature.Envelopes[i].Length; j++)
                {
                    Console.Write(feature.Envelopes[i][j] != null ? feature.Envelopes[i][j].Abundance : 0);
                    Console.Write("\t");
                }
                Console.Write("\n");
            }
        }
コード例 #12
0
ファイル: TestProMex.cs プロジェクト: javamng/GitHUB
        public void ExtractLcMsFeaturesForTrainingSet()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult";

            if (!Directory.Exists(idFileFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder);
            }

            var tolerance  = new Tolerance(10);
            var tolerance2 = new Tolerance(20);
            var id         = 1;


            for (var d = 0; d < TrainSetFileLists.Length; d++)
            {
                var dataset            = TrainSetFileLists[d];
                var dataname           = Path.GetFileNameWithoutExtension(dataset);
                var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset));
                var featureResult      = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset));

                if (!File.Exists(dataset))
                {
                    Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset);
                    continue;
                }
                if (!File.Exists(filtedIdResultFile))
                {
                    Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile);
                    continue;
                }


                var run = PbfLcMsRun.GetLcMsRun(dataset);


                var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset)));
                var decoyStatWriter  = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset)));
                var writer           = new StreamWriter(featureResult);

                writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t");
                writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n");
                var tsvParser = new TsvFileParser(filtedIdResultFile);

                var featureFinder = new LcMsPeakMatrix(run);


                for (var i = 0; i < tsvParser.NumData; i++)
                {
                    var minScan   = int.Parse(tsvParser.GetData("MinScan")[i]);
                    var maxScan   = int.Parse(tsvParser.GetData("MaxScan")[i]);
                    var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]);
                    var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]);
                    var mass      = double.Parse(tsvParser.GetData("Mass")[i]);

                    writer.Write(minScan);
                    writer.Write("\t");
                    writer.Write(maxScan);
                    writer.Write("\t");
                    writer.Write(minCharge);
                    writer.Write("\t");
                    writer.Write(maxCharge);
                    writer.Write("\t");
                    writer.Write(mass);
                    writer.Write("\t");

                    var binNum = featureFinder.Comparer.GetBinNumber(mass);

                    var binMass = featureFinder.Comparer.GetMzAverage(binNum);

                    var             binNumList     = (mass < binMass) ? new int[] { binNum, binNum - 1, binNum + 1 } : new int[] { binNum, binNum + 1, binNum - 1 };
                    LcMsPeakCluster refinedFeature = null;

                    foreach (var bi in binNumList)
                    {
                        var tempList = new List <LcMsPeakCluster>();
                        var features = featureFinder.FindFeatures(bi);
                        var massTh   = (mass < 2000) ? tolerance2.GetToleranceAsTh(mass) : tolerance.GetToleranceAsTh(mass);
                        foreach (var feature in features)
                        {
                            if (Math.Abs(mass - feature.Mass) < massTh)
                            {
                                tempList.Add(feature);
                            }
                        }

                        //var nHits = 0;
                        var highestAbu = 0d;
                        //var scans = Enumerable.Range(minScan, maxScan - minScan + 1);
                        foreach (var feature in tempList)
                        {
                            //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1);
                            //var hitScans = scans.Intersect(scans2).Count();
                            if (feature.MinScanNum < 0.5 * (minScan + maxScan) &&
                                0.5 * (minScan + maxScan) < feature.MaxScanNum)
                            {
                                if (feature.Abundance > highestAbu)
                                {
                                    refinedFeature = feature;
                                    highestAbu     = feature.Abundance;
                                }
                            }

                            /*if (hitScans > 0)
                             * {
                             *  refinedFeature = feature;
                             *  nHits = hitScans;
                             * }*/
                        }

                        if (refinedFeature != null)
                        {
                            break;
                        }
                    }

                    if (refinedFeature != null)
                    {
                        writer.Write(refinedFeature.Mass);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinScanNum);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxScanNum);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinCharge);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxCharge);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinElutionTime);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxElutionTime);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime);
                        writer.Write("\t");

                        var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan);
                        writer.Write(good ? 1 : 0);
                        writer.Write("\n");
                        //writer.Write(0); writer.Write("\t");
                        //writer.Write(0); writer.Write("\n");

                        OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter);

                        var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz);
                        refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2);
                        OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter);
                        id++;
                    }
                    else
                    {
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\n");
                    }
                    //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge);
                }
                writer.Close();
                targetStatWriter.Close();
                decoyStatWriter.Close();
                Console.WriteLine(dataname);
            }
        }
コード例 #13
0
        public void TestLcMsFeatureFinder()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft");
            //var scoreDataPath = @"D:\MassSpecFiles\training";
            var scorer    = new LcMsFeatureLikelihood();
            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", pbfFile.FullName);

            var run           = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            var featureFinder = new LcMsPeakMatrix(run, scorer);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec",
                              (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            var    maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");

            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(
                        @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);

            // write result files
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");

            stopwatch.Stop();

            // Start to quantify accurate abundance
            stopwatch.Restart();
            //var quantAnalyzer = new TargetMs1FeatureMatrix(run);
            //var oriResult = new List<Ms1FeatureCluster>();
            //var quantResult = new List<Ms1Feature>();

            var featureId   = 0;
            var ms1ScanNums = run.GetMs1ScanVector();
            //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance");

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);

            foreach (var feature in filteredFeatures)
            {
                Console.Write(featureId);
                Console.Write("\t");
                Console.Write(feature.Mass);
                Console.Write("\t");
                Console.Write(feature.MinScanNum);
                Console.Write("\t");
                Console.Write(feature.MaxScanNum);
                Console.Write("\t");
                Console.Write(feature.MinCharge);
                Console.Write("\t");
                Console.Write(feature.MaxCharge);
                Console.Write("\t");

                Console.Write(feature.RepresentativeScanNum);
                Console.Write("\t");
                Console.Write(feature.RepresentativeMz);
                Console.Write("\t");
                Console.Write(feature.RepresentativeCharge);
                Console.Write("\t");

                //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t");
                //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestCorrelationScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestCorrelationScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestIntensityScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestIntensityScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.AbundanceDistributionAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.AbundanceDistributionAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.XicCorrelationBetweenBestCharges[0]);
                Console.Write("\t");
                Console.Write(feature.XicCorrelationBetweenBestCharges[1]);
                Console.Write("\t");

                Console.Write(feature.Score);
                Console.Write("\n");
                featureId++;
            }
        }
コード例 #14
0
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage(@"Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();


            // write result files
            var tsvWriter = new StreamWriter(ms1FeaturesFilePath);

            tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport));

            StreamWriter csvWriter = null;

            if (Parameters.CsvOutput)
            {
                csvWriter = new StreamWriter(outCsvFilePath);
                csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
            }

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);
            var featureId        = 0;

            foreach (var feature in filteredFeatures)
            {
                featureId++;
                tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport));

                var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                if (csvWriter != null)
                {
                    foreach (var envelope in feature.EnumerateEnvelopes())
                    {
                        //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                        var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                        if (mostAbuPeak == null || !mostAbuPeak.Active)
                        {
                            continue;
                        }

                        var fitscore = 1.0 - feature.BestCorrelationScore;
                        csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId));
                    }
                }
            }
            tsvWriter.Close();

            Console.WriteLine(@"Complete feature filtration");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of filtered features = {0}", featureId);
            Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath);

            if (csvWriter != null)
            {
                csvWriter.Close();
                Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }
コード例 #15
0
        /// <summary>
        /// Create <see cref="Ms1FtEntry"/> objects for the features, and output to csv (if desired).
        /// </summary>
        /// <param name="container"></param>
        /// <param name="featureFinder"></param>
        /// <param name="outCsvFilePath"></param>
        /// <param name="featureCounter"></param>
        /// <returns></returns>
        private IEnumerable <Ms1FtEntry> FilterFeaturesWithOutput(LcMsFeatureContainer container, LcMsPeakMatrix featureFinder, string outCsvFilePath, int[] featureCounter)
        {
            // Using an array, since we can't use ref or out parameters
            featureCounter[0] = 0;

            Stream csvStream = new MemoryStream();

            if (Parameters.CsvOutput)
            {
                csvStream = new FileStream(outCsvFilePath, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
            }
            using (var csvWriter = new StreamWriter(csvStream))
            {
                if (Parameters.CsvOutput)
                {
                    csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID");
                }

                var filteredFeatures = container.GetFilteredFeatures(featureFinder);
                foreach (var feature in filteredFeatures)
                {
                    featureCounter[0]++;

                    if (Parameters.CsvOutput)
                    {
                        var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0];

                        foreach (var envelope in feature.EnumerateEnvelopes())
                        {
                            //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0];
                            var mostAbuPeak = envelope.Peaks[mostAbuIdx];
                            if (mostAbuPeak == null || !mostAbuPeak.Active)
                            {
                                continue;
                            }

                            var fitscore = 1.0 - feature.BestCorrelationScore;
                            csvWriter.WriteLine("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance,
                                                mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureCounter);
                        }
                    }

                    yield return(feature.ToMs1FtEntry(featureCounter[0]));
                }
            }
        }
コード例 #16
0
        public void FillMissingFeatures(int dataSetIndex, double scoreThreshold = -30, IProgress <ProgressData> progressReporter = null)
        {
            if (_alignedFeatures == null)
            {
                return;
            }

            var run           = _runList[dataSetIndex];
            var ms1ScanNums   = run.GetMs1ScanVector();
            var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());

            var progressData = new ProgressData(progressReporter);

            for (var j = 0; j < CountAlignedFeatures; j++)
            {
                if (_alignedFeatures[j][dataSetIndex] != null)
                {
                    continue;
                }

                var mass       = 0d;
                var charge     = 0;
                var minScanNum = -1;
                var maxScanNum = ms1ScanNums.Last();
                var repFt      = GetRepFeatureInfo(_alignedFeatures[j]);
                mass   = repFt.Mass;
                charge = repFt.Charge;
                var minNet = repFt.MinNet;
                var maxNet = repFt.MaxNet;

                for (var k = 0; k < ms1ScanNums.Length; k++)
                {
                    var net = run.GetElutionTime(ms1ScanNums[k]) / run.GetElutionTime(run.MaxLcScan);
                    if (net > minNet && minScanNum < 0)
                    {
                        minScanNum = (k == 0) ? ms1ScanNums[k] : ms1ScanNums[k - 1];
                    }

                    if (net > maxNet)
                    {
                        maxScanNum = ms1ScanNums[k];
                        break;
                    }
                }

                if (minScanNum < 0)
                {
                    minScanNum = 0;
                }

                var newFt = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                _alignedFeatures[j][dataSetIndex] = (newFt == null) ? featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, repFt.MinCharge, repFt.MaxCharge) : newFt;

                /*
                 * var ft = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                 *
                 * if (ft == null || ft.Score < scoreThreshold)
                 *  _alignedFeatures[j][dataSetIndex] = featureFinder.CollectLcMsPeaksWithNoise(mass, charge, minScanNum,
                 *      maxScanNum, repFt.MinCharge, repFt.MaxCharge);
                 * else
                 *  _alignedFeatures[j][dataSetIndex] = ft;*/

                progressData.Report(j, this.CountAlignedFeatures);
            }

            featureFinder = null;
        }
コード例 #17
0
        public void TestLcMsFeatureFinder()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";

            //const string rawFile = @"D:\MassSpecFiles\CompRef\CPTAC_Intact_CR_Pool_2_25Jun15_Bane_15-02-02RZ.pbf";
            //const string rawFile = @"D:\MassSpecFiles\IMER\Dey_IMERblast_01_08May14_Alder_14-01-33.pbf";
            //const string rawFile = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_3\MZ20150729FG_WT1.pbf";

            if (!File.Exists(rawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile);
            }

            // var outTsvFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, "ms1ft");
            //var scoreDataPath = @"D:\MassSpecFiles\training";
            var scorer    = new LcMsFeatureLikelihood();
            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine(@"Start loading MS1 data from {0}", rawFile);
            var run           = PbfLcMsRun.GetLcMsRun(rawFile);
            var featureFinder = new LcMsPeakMatrix(run, scorer);

            Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec",
                              (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, scorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            var    maxSearchMassBin = featureFinder.Comparer.GetBinNumber(11180.33677);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine(@"Start MS1 feature extraction.");

            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = ((double)processedBins / totalMassBin) * 100;
                    Console.WriteLine(
                        @"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                        processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                        container.NumberOfFeatures);
                }
            }

            Console.WriteLine(@"Complete MS1 feature extraction.");
            Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures);

            // write result files
            Console.WriteLine(@"Start selecting mutually independent features from feature network graph");


            stopwatch.Stop();

            // Start to quantify accurate abundance
            stopwatch.Restart();
            //var quantAnalyzer = new TargetMs1FeatureMatrix(run);
            //var oriResult = new List<Ms1FeatureCluster>();
            //var quantResult = new List<Ms1Feature>();

            var featureId   = 0;
            var ms1ScanNums = run.GetMs1ScanVector();
            //tsvWriter.WriteLine(GetHeaderString() + "\tQMinScanNum\tQMaxScanNum\tQMinCharge\tQMaxCharge\tQAbundance");

            var filteredFeatures = container.GetFilteredFeatures(featureFinder);

            foreach (var feature in filteredFeatures)
            {
                Console.Write(featureId);
                Console.Write("\t");
                Console.Write(feature.Mass);
                Console.Write("\t");
                Console.Write(feature.MinScanNum);
                Console.Write("\t");
                Console.Write(feature.MaxScanNum);
                Console.Write("\t");
                Console.Write(feature.MinCharge);
                Console.Write("\t");
                Console.Write(feature.MaxCharge);
                Console.Write("\t");

                Console.Write(feature.RepresentativeScanNum);
                Console.Write("\t");
                Console.Write(feature.RepresentativeMz);
                Console.Write("\t");
                Console.Write(feature.RepresentativeCharge);
                Console.Write("\t");

                //Console.Write(feature.BestSummedEnvelopeDistance); Console.Write("\t");
                //Console.Write(feature.BestEnvelopeDistance); Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestDistanceScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestCorrelationScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestCorrelationScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.BestIntensityScoreAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.BestIntensityScoreAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.AbundanceDistributionAcrossCharge[0]);
                Console.Write("\t");
                Console.Write(feature.AbundanceDistributionAcrossCharge[1]);
                Console.Write("\t");

                Console.Write(feature.XicCorrelationBetweenBestCharges[0]);
                Console.Write("\t");
                Console.Write(feature.XicCorrelationBetweenBestCharges[1]);
                Console.Write("\t");

                Console.Write(feature.Score);
                Console.Write("\n");
                featureId++;
            }
        }
コード例 #18
0
        public void TestQuantifyIdedProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var prsmReader = new ProteinSpectrumMatchReader(0.01);

            var filesProcessed = 0;

            var tolerance = new Tolerance(10);

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                filesProcessed++;

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                // PrSM To Feature
                var prsmToFeatureIdMap = new int[prsmList.Count];
                for (var k = 0; k < prsmToFeatureIdMap.Length; k++)
                {
                    prsmToFeatureIdMap[k] = -1;
                }

                // Feature To PrSM
                var featureToPrsm = new List <ProteinSpectrumMatchSet>();

                var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
                var featureList   = new List <LcMsPeakCluster>();
                var featureId     = 0;
                for (var j = 0; j < prsmList.Count; j++)
                {
                    if (prsmToFeatureIdMap[j] >= 0)
                    {
                        continue;
                    }

                    var match      = prsmList[j];
                    var minScanNum = match.ScanNum;
                    var maxScanNum = match.ScanNum;
                    var mass       = match.Mass;
                    var charge     = match.Charge;
                    var massTh     = tolerance.GetToleranceAsMz(mass);
                    var id1        = match.ProteinId;

                    var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                    var prsmSet = new ProteinSpectrumMatchSet(i)
                    {
                        match
                    };
                    if (feature == null)
                    {
                        feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge);
                        prsmToFeatureIdMap[j] = featureId;
                    }
                    else
                    {
                        prsmToFeatureIdMap[j] = featureId;
                        var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2);

                        for (var k = j + 1; k < prsmList.Count; k++)
                        {
                            var otherMatch = prsmList[k];
                            var id2        = otherMatch.ProteinId;
                            var et2        = run.GetElutionTime(otherMatch.ScanNum);

                            if (id1.Equals(id2) &&
                                feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol &&
                                Math.Abs(otherMatch.Mass - mass) < massTh)
                            {
                                prsmToFeatureIdMap[k] = featureId;
                                prsmSet.Add(otherMatch);
                            }
                        }
                    }
                    featureId++;

                    feature.Flag = 1;
                    featureList.Add(feature);
                    featureToPrsm.Add(prsmSet);
                }

                // Overlap between features???
                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    for (var k = j + 1; k < featureList.Count; k++)
                    {
                        var f2 = featureList[k];
                        if (f2.Flag < 1)
                        {
                            continue;
                        }

                        var prsm2 = featureToPrsm[k];
                        if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass))
                        {
                            continue;
                        }
                        if (!f1.CoElutedByNet(f2, 0.005))
                        {
                            continue;
                        }
                        if (!prsm1.ShareProteinId(prsm2))
                        {
                            continue;
                        }

                        // let us merge!!
                        if (f1.ScanLength > f2.ScanLength)
                        {
                            prsm1.AddRange(prsm2);
                            prsm2.Clear();
                            f2.Flag = 0;
                        }
                        else
                        {
                            prsm2.AddRange(prsm1);
                            prsm1.Clear();
                            f1.Flag = 0;
                        }
                    }
                }

                // now output results!!
                var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                var writer        = new StreamWriter(ms1ftFilePath);
                writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1);
                    var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1);
                    f1.ExpandScanRange(minScanNum, maxScanNum);

                    writer.Write("{0}\t", j + 1);
                    writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer.Close();

                Console.WriteLine(ms1ftFilePath);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since data files not found");
            }
        }
コード例 #19
0
        /// <summary>
        /// Find features in the data file
        /// </summary>
        /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param>
        /// <returns>0 if success; negative number on error</returns>
        private int ProcessFile(string rawFile)
        {
            var outDirectory = GetOutputDirectory(rawFile);

            if (string.IsNullOrEmpty(outDirectory))
            {
                return(-1);
            }

            var baseName            = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile));
            var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension);
            var outCsvFilePath      = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv");
            var pngFilePath         = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png");

            if (File.Exists(ms1FeaturesFilePath))
            {
                ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath);
                return(-2);
            }

            if (!File.Exists(rawFile))
            {
                ShowErrorMessage("Cannot find input file: " + rawFile);
                return(-3);
            }

            var stopwatch = Stopwatch.StartNew();

            Console.WriteLine("Start loading MS1 data from {0}", rawFile);
            var run = PbfLcMsRun.GetLcMsRun(rawFile);

            var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads);

            Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            if (run.GetMs1ScanVector().Length == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile));
                return(-4);
            }

            if (featureFinder.Ms1PeakCount == 0)
            {
                ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile));
                return(-5);
            }

            var    comparer         = featureFinder.Comparer;
            var    container        = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10)));
            var    minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass);
            var    maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass);
            double totalMassBin     = maxSearchMassBin - minSearchMassBin + 1;

            Console.WriteLine("Start MS1 feature extraction.");
            stopwatch.Restart();
            for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++)
            {
                var clusters = featureFinder.FindFeatures(binNum);
                container.Add(clusters);

                if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0)
                {
                    var elapsed             = (stopwatch.ElapsedMilliseconds) / 1000.0d;
                    var processedBins       = binNum - minSearchMassBin;
                    var processedPercentage = processedBins / totalMassBin * 100;
                    Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}",
                                      processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed,
                                      container.NumberOfFeatures);
                }
            }

            Console.WriteLine("Complete MS1 feature extraction.");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures);
            Console.WriteLine("Start selecting mutually independent features from feature network graph");
            stopwatch.Restart();

            var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath);

            Console.WriteLine("Complete feature filtration");
            Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            Console.WriteLine(" - Number of filtered features = {0}", featureId);
            Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath);

            if (Parameters.CsvOutput)
            {
                Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath);
            }

            if (Parameters.FeatureMapImage)
            {
                CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath);
            }

            return(0);
        }