예제 #1
0
 public LcMsFeatureMap(LcMsRun run, string ms1FtPath, double minMass, double maxMass)
     : this(run,
            LcMsFeatureAlignment.LoadProMexResult(0, ms1FtPath, run, minMass, maxMass),
            Path.GetFileNameWithoutExtension(ms1FtPath),
            minMass, maxMass)
 {
 }
예제 #2
0
        private void OutputAlignmentResult(LcMsFeatureAlignment align, string outFilePath, string[] dataName)
        {
            var alignedFeatureList = align.GetAlignedFeatures();

            var writer = new StreamWriter(outFilePath);

            writer.Write("MonoMass\tMinElutionTime\tMaxElutionTime");
            for (var i = 0; i < align.CountDatasets; i++)
            {
                writer.Write("\t{0}", dataName[i]);
            }
            writer.Write("\n");

            for (var i = 0; i < align.CountAlignedFeatures; i++)
            {
                var features  = alignedFeatureList[i];
                var minMaxNet = TestLcMsFeatureAlignment.GetMinMaxNet(features);
                writer.Write(@"{0}	{1:0.00000}	{2:0.00000}", minMaxNet.Item1, minMaxNet.Item3, minMaxNet.Item4);

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature != null ? feature.Abundance : 0d);
                }
                writer.Write("\n");
            }
            writer.Close();
        }
예제 #3
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\CompRef\aligned\promex_crosstab_temp.tsv";

            var runLabels = new[] { "32A", "32B", "32C", "32D", "32E", "32F", "32G", "33A", "33B", "33C", "33D", "33E", "33F", "33G" };
            var nDataset  = runLabels.Length;

            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile   = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.pbf", RawFolder, runLabels[i]);
                var mspFile   = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ_IcTda.tsv", MsPfFolder, runLabels[i]);
                var ms1FtFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.ms1ft", Ms1FtFolder, runLabels[i]);

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsMz(features[j].Mass);
                        foreach (var match in prsmList)
                        {
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", runLabels[i]);
            }

            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
예제 #4
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv";
            
            
            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance = new Tolerance(10);
            var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < NdataSet; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i));
                var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i));
                var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i));
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i));
                Console.WriteLine(rawFile);
                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                prsmList1.AddRange(prsmList2);
                
                var prsmList = MergePrsm(prsmList1);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);
            
            for (var i = 0; i < NdataSet; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", GetDataSetNames(i));
            }
            
            OutputCrossTabWithId(outFilePath, alignment);
        }
예제 #5
0
        private void AlignFeatures(List <string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath)
        {
            var nDataset   = datasets.Count;
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(12);
            var alignment  = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile    = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]);
                var mspFile    = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]);
                var ms1FtFile  = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]);
                var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]);

                var run       = PbfLcMsRun.GetLcMsRun(rawFile);
                var features  = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);
                var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run);
                features.AddRange(features2);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                    //var prsmFeatureMatch = new bool[prsmList.Count];

                    foreach (var match in prsmList)
                    {
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    foreach (var feature in features)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsMz(feature.Mass);
                        foreach (var match in prsmList)
                        {
                            if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol)
                            {
                                feature.ProteinSpectrumMatches.Add(match);
                                //prsmFeatureMatch[k] = true;
                            }
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", datasets[i]);
            }

            AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets);
        }
예제 #6
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv";


            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < NdataSet; i++)
            {
                var rawFile   = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i));
                var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i));
                var mspFile2  = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i));
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i));
                Console.WriteLine(rawFile);
                var run       = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                prsmList1.AddRange(prsmList2);

                var prsmList = MergePrsm(prsmList1);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < NdataSet; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", GetDataSetNames(i));
            }

            OutputCrossTabWithId(outFilePath, alignment);
        }
예제 #7
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Quant\aligned\promex_crosstab.tsv";
            //const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned";
            var runLabels = new string[] { "1x1", "1x2", "1x3", "1x4", "1x5", "5x1", "5x2", "5x3", "5x4", "5x5", "10x1", "10x2", "10x3", "10x4", "10x5", };
            var nDataset  = runLabels.Length;

            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new SpikeInFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile   = string.Format(@"{0}\{1}.pbf", RawFolder, datasets[i]);
                var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, datasets[i]);
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, datasets[i]);

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            /*
             * for (var i = 0; i < nDataset; i++)
             * {
             *  alignment.FillMissingFeatures(i);
             *  Console.WriteLine("{0} has been processed", runLabels[i]);
             * }
             */
            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
예제 #8
0
        public void TestCptacSpikeIn()
        {
            const string featureFolder = @"D:\MassSpecFiles\CPTAC_spike_in\promex";
            const string rawFolder = @"D:\MassSpecFiles\CPTAC_spike_in\raw";
            var outFilePath = string.Format(@"{0}\aligned_features.tsv", featureFolder);
            var align = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10)));
            
            for (var i = 0; i < spikeDatasets.Length; i++)
            {
                var featureFilePath = string.Format(@"{0}\{1}.ms1ft", featureFolder, spikeDatasets[i]);
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, spikeDatasets[i]);

                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }


                if (!File.Exists(featureFilePath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", featureFilePath);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var s = 0d;
                foreach (var scanNum in run.GetMs1ScanVector())
                {
                    var spec = run.GetSpectrum(scanNum);
                    var summedIntensity = spec.Peaks.Sum(p => p.Intensity);
                    s += summedIntensity;
                }
                foreach (var scanNum in run.GetScanNumbers(2))
                {
                    var spec = run.GetSpectrum(scanNum);
                    var summedIntensity = spec.Peaks.Sum(p => p.Intensity);
                    s += summedIntensity;
                }

                Console.WriteLine("{0}\t{1}", i, s);
                //var features = LcMsFeatureAlignment.LoadProMexResult(i, featureFilePath, run);

                //align.AddDataSet(i, features, run);

            }
            //align.AlignFeatures();
            //Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            //align.RefineAbundance();
            //OutputAlignmentResult(align, outFilePath, spikeDatasets);
        }
예제 #9
0
        public void TestCptacSpikeIn()
        {
            const string featureFolder = @"D:\MassSpecFiles\CPTAC_spike_in\promex";
            const string rawFolder     = @"D:\MassSpecFiles\CPTAC_spike_in\raw";
            var          outFilePath   = string.Format(@"{0}\aligned_features.tsv", featureFolder);
            var          align         = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10)));

            for (var i = 0; i < spikeDatasets.Length; i++)
            {
                var featureFilePath = string.Format(@"{0}\{1}.ms1ft", featureFolder, spikeDatasets[i]);
                var rawFile         = string.Format(@"{0}\{1}.pbf", rawFolder, spikeDatasets[i]);

                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }


                if (!File.Exists(featureFilePath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", featureFilePath);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var s   = 0d;
                foreach (var scanNum in run.GetMs1ScanVector())
                {
                    var spec            = run.GetSpectrum(scanNum);
                    var summedIntensity = spec.Peaks.Sum(p => p.Intensity);
                    s += summedIntensity;
                }
                foreach (var scanNum in run.GetScanNumbers(2))
                {
                    var spec            = run.GetSpectrum(scanNum);
                    var summedIntensity = spec.Peaks.Sum(p => p.Intensity);
                    s += summedIntensity;
                }

                Console.WriteLine("{0}\t{1}", i, s);
                //var features = LcMsFeatureAlignment.LoadProMexResult(i, featureFilePath, run);

                //align.AddDataSet(i, features, run);
            }
            //align.AlignFeatures();
            //Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            //align.RefineAbundance();
            //OutputAlignmentResult(align, outFilePath, spikeDatasets);
        }
예제 #10
0
        public void CompareClustering()
        {
            // Cluster using MultiAlign to Promex adapters
            var provider          = new ScanSummaryProviderCache();
            var reader1           = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader;
            var reader2           = provider.GetScanSummaryProvider(pbf2, 1) as InformedProteomicsReader;
            var promexFileReader1 = new PromexFileReader(reader1, 0);
            var features1         = promexFileReader1.ReadFile(ms1ft1);

            var promexFileReader2 = new PromexFileReader(reader2, 1);
            var features2         =
                promexFileReader2.ReadFile(ms1ft2);

            var features = new List <UMCLight>();

            features.AddRange(features1);
            features.AddRange(features2);

            var clusterer = new PromexClusterer
            {
                Readers = provider,
            };
            var clusters     = clusterer.Cluster(features);
            var clusterCount = clusters.Count(c => c.UmcList.Count > 1);

            // Cluster using only ProMex
            var lcmsRun1 = PbfLcMsRun.GetLcMsRun(pbf1);
            var lcmsRun2 = PbfLcMsRun.GetLcMsRun(pbf2);

            var aligner         = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10, ToleranceUnit.Ppm)));
            var promexFeatures1 = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun1);

            aligner.AddDataSet(0, promexFeatures1, lcmsRun1);

            var promexFeatures2 = LcMsFeatureAlignment.LoadProMexResult(1, ms1ft2, lcmsRun2);

            aligner.AddDataSet(1, promexFeatures2, lcmsRun2);

            aligner.AlignFeatures();
            var promexClusters     = aligner.GetAlignedFeatures();
            var promexClusterCount = promexClusters.Count(c => c.Count(f => f != null) > 1);

            Assert.AreEqual(clusters.Count, promexClusters.Count);
            Assert.AreEqual(clusterCount, promexClusterCount);
        }
예제 #11
0
        private void RunFeatureAlignment(IList <string> ms1FtFiles, IReadOnlyList <string> rawFiles, string outFilePath)
        {
            var runList = new List <LcMsRun>();

            foreach (var rawFile in rawFiles)
            {
                runList.Add(new PbfLcMsRun(rawFile));
            }

            var align = new LcMsFeatureAlignment(ms1FtFiles, runList, new LcMsFeatureAlignComparer(new Tolerance(10)));

            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            var tempOutPath = outFilePath + ".tmp";

            OutputAlignmentResult(align, tempOutPath, rawFiles, true);

            align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, rawFiles, false);
        }
예제 #12
0
        public void TestCptac10Replicates()
        {
            const string featureFolder = @"D:\MassSpecFiles\CPTAC_rep10\icr2ls";
            const string rawFolder     = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1";
            var          outFilePath   = string.Format(@"{0}\aligned_features.tsv", featureFolder);
            var          align         = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10)));

            var dataNames = new string[10];

            for (var i = 0; i < 10; i++)
            {
                dataNames[i] = string.Format(@"CPTAC_Intact_rep{0}_15Jan15_Bane_C2-14-08-02RZ", i + 1);
                var featureFilePath = string.Format(@"{0}\{1}_isos.tsv", featureFolder, dataNames[i]);
                var rawFile         = string.Format(@"{0}\{1}.pbf", rawFolder, dataNames[i]);

                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }


                if (!File.Exists(featureFilePath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", featureFilePath);
                    continue;
                }
                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, featureFilePath, run);

                align.AddDataSet(i, features, run);
            }
            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            //var tempOutPath = outFilePath + ".tmp";
            //OutputAlignmentResult(align, tempOutPath, rawFiles, true);
            //align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, dataNames);
        }
예제 #13
0
        public void CompareFileReading()
        {
            // Read using MultiAlign to Promex adapters
            var provider         = new ScanSummaryProviderCache();
            var reader1          = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader;
            var promexFileReader = new PromexFileReader(reader1, 0);
            var features         = promexFileReader.ReadFile(ms1ft1).ToList();

            var lcmsRun        = PbfLcMsRun.GetLcMsRun(pbf1);
            var promexFeatures = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun).ToList();

            Assert.AreEqual(features.Count, promexFeatures.Count);

            for (int i = 0; i < features.Count; i++)
            {
                Assert.AreEqual(features[i].MassMonoisotopic, promexFeatures[i].Mass);
                ////Assert.AreEqual(features[i].Mz, promexFeatures[i].RepresentativeMz);
                Assert.AreEqual(features[i].Net, promexFeatures[i].Net);
                Assert.AreEqual(features[i].ScanStart, promexFeatures[i].MinScanNum);
                Assert.AreEqual(features[i].ScanEnd, promexFeatures[i].MaxScanNum);
                Assert.AreEqual(features[i].Abundance, promexFeatures[i].Abundance);
            }
        }
예제 #14
0
        public void FindMissingLcMsFeatures()
        {
            var mspfFolder  = @"D:\MassSpecFiles\CompRef_Kelleher\Study3";
            var ms1ftFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3";

            const int Nfraction1 = 3;
            const int Nfraction2 = 5;

            for (var frac1 = 1; frac1 <= Nfraction1; frac1++)
            {
                for (var frac2 = 1; frac2 <= Nfraction2; frac2++)
                {
                    var datasets = GetDataSetNamesStudy3(frac1, frac2);
                    //var outFilePath = string.Format(@"D:\MassSpecFiles\CompRef_Kelleher\study3_GFrep{0}_Gfrac{1}.tsv", frac1.ToString("D2"), frac2.ToString("D2"));
                    var nDataset   = datasets.Count;
                    var prsmReader = new ProteinSpectrumMatchReader();
                    var tolerance  = new Tolerance(12);

                    for (var i = 0; i < nDataset; i++)
                    {
                        var rawFile   = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]);
                        var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]);
                        var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]);
                        var outPath   = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]);

                        if (File.Exists(outPath))
                        {
                            continue;
                        }

                        var run              = PbfLcMsRun.GetLcMsRun(rawFile);
                        var features         = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);
                        var prsmList         = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                        var prsmFeatureMatch = new bool[prsmList.Count];

                        for (var j = 0; j < features.Count; j++)
                        {
                            //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                            var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                            for (var k = 0; k < prsmList.Count; k++)
                            {
                                var match = prsmList[k];
                                if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                                {
                                    features[j].ProteinSpectrumMatches.Add(match);
                                    prsmFeatureMatch[k] = true;
                                }
                            }
                        }

                        var missingPrsm = new List <ProteinSpectrumMatch>();
                        for (var k = 0; k < prsmList.Count; k++)
                        {
                            if (!prsmFeatureMatch[k])
                            {
                                missingPrsm.Add(prsmList[k]);
                            }
                        }

                        FeatureFind(missingPrsm, run, outPath);
                        Console.WriteLine(outPath);
                    }
                }
            }
        }
예제 #15
0
        public List <UMCClusterLight> Cluster(List <UMCLight> data, IProgress <ProgressData> progress = null)
        {
            progress = progress ?? new Progress <ProgressData>();

            if (data.Count == 0)
            {
                return(new List <UMCClusterLight>());
            }

            this.maxFeatureId = data.Select(d => d.Id).Max();

            this.featureMap = new Dictionary <Tuple <int, int>, UMCLight>();
            foreach (var feature in data)
            {
                var key = new Tuple <int, int>(feature.GroupId, feature.Id);
                this.featureMap.Add(key, feature);
            }

            var lcmsFeatureAligner =
                new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10, ToleranceUnit.Ppm)));

            // Group features by dataset
            var idToFeatures = new Dictionary <int, List <UMCLight> >();

            foreach (var umcLight in data)
            {
                if (!idToFeatures.ContainsKey(umcLight.GroupId))
                {
                    idToFeatures.Add(umcLight.GroupId, new List <UMCLight>());
                }

                idToFeatures[umcLight.GroupId].Add(umcLight);
            }

            // Convert UMCLights to InformedProteomics LcMsFeatures
            foreach (var ds in idToFeatures)
            {
                var lcmsFeatures = new List <LcMsFeature>(ds.Value.Select(this.GetLcMsFeature));
                lcmsFeatureAligner.AddDataSet(ds.Key, lcmsFeatures, this.GetLcMsRun(ds.Key));
            }

            // Perform clustering
            lcmsFeatureAligner.AlignFeatures();

            // Fill in mising features using noise.
            lcmsFeatureAligner.RefineAbundance(-30, progress);

            var clusteredFeatures = lcmsFeatureAligner.GetAlignedFeatures();

            // Convert InformedProteomics clusters to UMCClusterLight
            int clustId  = 0;
            var clusters = new List <UMCClusterLight>();

            foreach (var cluster in clusteredFeatures)
            {
                var firstFeature = cluster.FirstOrDefault(f => f != null);
                if (firstFeature == null)
                {
                    continue;
                }

                var umcCluster = new UMCClusterLight
                {
                    Id = clustId++,
                };

                int datasetId = 0;  // Promex doesn't keep track of which dataset noise features belong to, so we need to.
                foreach (var feature in cluster)
                {
                    if (feature == null)
                    {
                        continue;
                    }

                    feature.DataSetId = datasetId++;
                    var umc = this.GetUMC(feature);
                    umcCluster.AddChildFeature(umc);
                    umc.SetParentFeature(umcCluster);
                }

                umcCluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(umcCluster);
            }

            return(clusters);
        }
예제 #16
0
        public void OutputCrossTabWithId(string outputFilePath, LcMsFeatureAlignment alignment, string[] runLabels)
        {
            var nDataset = runLabels.Length;
            var writer   = new StreamWriter(outputFilePath);

            writer.Write("MonoMass");
            writer.Write("\t");
            writer.Write("MinElutionTime");
            writer.Write("\t");
            writer.Write("MaxElutionTime");

            foreach (var dataName in runLabels)
            {
                writer.Write("\t");
                writer.Write(dataName + "_Abundance");
            }

            foreach (var dataName in runLabels)
            {
                writer.Write("\t");
                writer.Write(dataName + "_Ms1Score");
            }

            writer.Write("\t");
            writer.Write("Pre");
            writer.Write("\t");
            writer.Write("Sequence");
            writer.Write("\t");
            writer.Write("Post");
            writer.Write("\t");
            writer.Write("Modifications");
            writer.Write("\t");
            writer.Write("SequenceText");
            writer.Write("\t");
            writer.Write("ProteinName");
            writer.Write("\t");
            writer.Write("ProteinDesc");
            writer.Write("\t");
            writer.Write("ProteinLength");
            writer.Write("\t");
            writer.Write("Start");
            writer.Write("\t");
            writer.Write("End");
            foreach (var dataName in runLabels)
            {
                writer.Write("\t");
                writer.Write(dataName + "_SpectraCount");
            }
            writer.Write("\n");

            var alignedFeatureList = alignment.GetAlignedFeatures();

            for (var j = 0; j < alignedFeatureList.Count; j++)
            {
                var features       = alignedFeatureList[j];
                var mass           = features.Where(f => f != null).Select(f => f.Mass).Median();
                var minElutionTime = features.Where(f => f != null).Select(f => f.MinElutionTime).Median();
                var maxElutionTime = features.Where(f => f != null).Select(f => f.MaxElutionTime).Median();
                writer.Write(mass);
                writer.Write("\t");
                writer.Write(minElutionTime);
                writer.Write("\t");
                writer.Write(maxElutionTime);

                for (var i = 0; i < nDataset; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].Abundance);
                }

                for (var i = 0; i < nDataset; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].Score);
                }

                var prsm = (from f in features
                            where f != null && f.ProteinSpectrumMatches != null && f.ProteinSpectrumMatches.Count > 0
                            select f.ProteinSpectrumMatches[0]).FirstOrDefault();

                if (prsm == null)
                {
                    for (var k = 0; k < 10; k++)
                    {
                        writer.Write("\t");
                        writer.Write(" ");
                    }
                }
                else
                {
                    writer.Write("\t");
                    writer.Write(prsm.Pre);
                    writer.Write("\t");
                    writer.Write(prsm.Sequence);
                    writer.Write("\t");
                    writer.Write(prsm.Post);
                    writer.Write("\t");
                    writer.Write(prsm.Modifications);
                    writer.Write("\t");
                    writer.Write(prsm.SequenceText);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinName);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinDesc);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinLength);
                    writer.Write("\t");
                    writer.Write(prsm.FirstResidue);
                    writer.Write("\t");
                    writer.Write(prsm.LastResidue);
                }

                // spectral count from ms2
                for (var i = 0; i < nDataset; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].ProteinSpectrumMatches.Count);
                }
                writer.Write("\n");
            }
            writer.Close();
        }
예제 #17
0
        public IEnumerable <UMCLight> ReadFile(string fileLocation)
        {
            var features = LcMsFeatureAlignment.LoadProMexResult(this.datasetId, fileLocation,
                                                                 this.reader.LcMsRun);

            var umcLights = new List <UMCLight> {
                Capacity = features.Count
            };

            int umcId = 0;
            int msId  = 0;

            foreach (var feature in features)
            {
                var chargeState = (feature.MinCharge + feature.MaxCharge) / 2;
                var mz          = (feature.Mass + (chargeState * Constants.Proton)) / chargeState;

                // Parent feature
                var umcLight = new UMCLight
                {
                    Id                      = umcId++,
                    GroupId                 = this.datasetId,
                    ScanStart               = feature.MinScanNum,
                    ScanEnd                 = feature.MaxScanNum,
                    Abundance               = feature.Abundance,
                    AbundanceSum            = feature.Abundance,
                    ChargeState             = chargeState,
                    MinCharge               = feature.MinCharge,
                    MaxCharge               = feature.MaxCharge,
                    Net                     = feature.Net,
                    NetAligned              = feature.Net,
                    NetStart                = feature.MinNet,
                    NetEnd                  = feature.MaxNet,
                    MassMonoisotopic        = feature.Mass,
                    MassMonoisotopicAligned = feature.Mass,
                    Mz                      = mz
                };

                for (int chargestate = feature.MinCharge; chargestate <= feature.MaxCharge; chargestate++)
                {
                    // Add min point
                    umcLight.AddChildFeature(new MSFeatureLight
                    {
                        Id               = msId++,
                        GroupId          = this.datasetId,
                        Scan             = feature.MinScanNum,
                        Abundance        = feature.Abundance,
                        ChargeState      = chargestate,
                        Net              = feature.MinNet,
                        MassMonoisotopic = feature.Mass,
                        Mz               = mz
                    });

                    // Add max point
                    umcLight.AddChildFeature(new MSFeatureLight
                    {
                        Id               = msId++,
                        GroupId          = this.datasetId,
                        Scan             = feature.MaxScanNum,
                        Abundance        = feature.Abundance,
                        ChargeState      = chargestate,
                        Net              = feature.MaxNet,
                        MassMonoisotopic = feature.Mass,
                        Mz               = mz
                    });
                }

                //umcLight.CalculateStatistics(ClusterCentroidRepresentation.Median);

                umcLights.Add(umcLight);
            }

            return(umcLights);
        }
예제 #18
0
        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                ShowSyntax();
                return;
            }

            // Parse file
            var inputFilePath = args[0];

            if (!File.Exists(inputFilePath))
            {
                ConsoleMsgUtils.ShowError("File not found: " + inputFilePath);
                return;
            }

            var datasets = DatasetInfo.ParseDatasetInfoFile(inputFilePath);

            if (datasets.Count == 0)
            {
                ConsoleMsgUtils.ShowError("No valid data found in the dataset info file");
                ShowSyntax();
                return;
            }

            var fileName  = Path.GetFileNameWithoutExtension(inputFilePath);
            var directory = Path.GetDirectoryName(inputFilePath);

            var crosstabFilename = string.Format("{0}_crosstab.tsv", fileName);

            string outputfilePath;

            if (string.IsNullOrWhiteSpace(directory))
            {
                outputfilePath = crosstabFilename;
            }
            else
            {
                outputfilePath = Path.Combine(directory, crosstabFilename);
            }

            var nDataset   = datasets.Count;
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(100);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            var dataId = 0;

            foreach (var dataset in datasets)
            {
                if (!File.Exists(dataset.RawFilePath))
                {
                    ConsoleMsgUtils.ShowError("Instrument file not found: " + dataset.RawFilePath);
                    continue;
                }

                if (!File.Exists(dataset.Ms1FtFilePath))
                {
                    ConsoleMsgUtils.ShowError("ProMex results file not found: " + dataset.Ms1FtFilePath);
                    continue;
                }

                Console.WriteLine("Opening " + dataset.RawFilePath);
                var run = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0);

                Console.WriteLine("Opening " + dataset.Ms1FtFilePath);
                var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run);

                if (!string.IsNullOrWhiteSpace(dataset.MsPfIdFilePath) && File.Exists(dataset.MsPfIdFilePath))
                {
                    Console.WriteLine("Opening " + dataset.MsPfIdFilePath);
                    var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder);

                    foreach (var match in prsmList)
                    {
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    foreach (var feature in features)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsMz(feature.Mass);
                        foreach (var match in prsmList)
                        {
                            if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol)
                            {
                                feature.ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }

                alignment.AddDataSet(dataId, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            var validResults = 0;

            for (var datasetIndex = 0; datasetIndex < nDataset; datasetIndex++)
            {
                if (datasetIndex >= alignment.CountDatasets)
                {
                    ConsoleMsgUtils.ShowWarning(string.Format("Could not align {0}; features not found", datasets[datasetIndex].Label));
                    continue;
                }

                alignment.FillMissingFeatures(datasetIndex);
                Console.WriteLine("{0} has been processed", datasets[datasetIndex].Label);
                validResults++;
            }

            if (validResults > 0)
            {
                OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray());
            }
        }
예제 #19
0
        public static void Main(string[] args)
        {
            // Parse file
            var inputFilePath = args[0];
            var datasets      = DatasetInfo.ParseDatasetInfoFile(inputFilePath);

            var fileName       = Path.GetFileNameWithoutExtension(inputFilePath);
            var directory      = Path.GetDirectoryName(inputFilePath);
            var outputfilePath = Path.Combine(directory, string.Format("{0}_crosstab.tsv", fileName));

            int nDataset   = datasets.Count;
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(100);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            int dataId = 0;

            foreach (var dataset in datasets)
            {
                var run      = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0);
                var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run);

                if (File.Exists(dataset.MsPfIdFilePath))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder);

                    foreach (var match in prsmList)
                    {
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    foreach (LcMsFeature feature in features)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsMz(feature.Mass);
                        foreach (var match in prsmList)
                        {
                            if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol)
                            {
                                feature.ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }

                alignment.AddDataSet(dataId, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", datasets[i].Label);
            }

            OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray());
        }
예제 #20
0
        public void AlignFeatures(List<string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath)
        {
            var nDataset = datasets.Count;
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance = new Tolerance(12);
            var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));
            for (var i = 0; i < nDataset; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]);
                var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]);
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]);
                var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]);

                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);
                var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run);
                features.AddRange(features2);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                    //var prsmFeatureMatch = new bool[prsmList.Count];

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                        for(var k = 0; k < prsmList.Count; k++)
                        {
                            var match = prsmList[k];
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                                //prsmFeatureMatch[k] = true;
                            }
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", datasets[i]);
            }

            AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets.ToArray());
        }
예제 #21
0
        public void TestCptac10Replicates()
        {
            const string featureFolder = @"D:\MassSpecFiles\CPTAC_rep10\icr2ls";
            const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_1";
            var outFilePath = string.Format(@"{0}\aligned_features.tsv", featureFolder);
            var align = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10)));

            var dataNames = new string[10];
            for (var i = 0; i < 10; i++) 
            {
                dataNames[i] = string.Format(@"CPTAC_Intact_rep{0}_15Jan15_Bane_C2-14-08-02RZ", i+1);
                var featureFilePath = string.Format(@"{0}\{1}_isos.tsv", featureFolder, dataNames[i]);
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataNames[i]);    
                
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }

                
                if (!File.Exists(featureFilePath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", featureFilePath);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, featureFilePath, run);

                align.AddDataSet(i, features, run);
                
            }
            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            //var tempOutPath = outFilePath + ".tmp";
            //OutputAlignmentResult(align, tempOutPath, rawFiles, true);
            //align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, dataNames);
        }
예제 #22
0
        private void OutputAlignmentResult(LcMsFeatureAlignment align, string outFilePath, string[] dataName)
        {
            var alignedFeatureList = align.GetAlignedFeatures();

            var writer = new StreamWriter(outFilePath);
            writer.Write("MonoMass\tMinElutionTime\tMaxElutionTime");
            for (var i = 0; i < align.CountDatasets; i++)
            {
                writer.Write("\t{0}", dataName[i]);
            }
            writer.Write("\n");

            for (var i = 0; i < align.CountAlignedFeatures; i++)
            {
                var features = alignedFeatureList[i];
                var minMaxNet = TestLcMsFeatureAlignment.GetMinMaxNet(features);
                writer.Write(@"{0}	{1:0.00000}	{2:0.00000}", minMaxNet.Item1, minMaxNet.Item3, minMaxNet.Item4);

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature != null ? feature.Abundance : 0d);
                }
                writer.Write("\n");
            }
            writer.Close();
        }
예제 #23
0
        private void OutputCrossTabWithId(string outputFilePath, LcMsFeatureAlignment alignment)
        {
            using (var writer = new StreamWriter(outputFilePath))
            {
                var headerLine = new List <string>
                {
                    "MonoMass",
                    "MinElutionTime",
                    "MaxElutionTime"
                };

                for (var i = 0; i < DATASET_COUNT; i++)
                {
                    var dataName = GetDataSetNames(i);
                    headerLine.Add(dataName + "_Abundance");
                }

                for (var i = 0; i < DATASET_COUNT; i++)
                {
                    var dataName = GetDataSetNames(i);
                    headerLine.Add(dataName + "_Ms1Score");
                }

                headerLine.Add("Pre");
                headerLine.Add("Sequence");
                headerLine.Add("Post");
                headerLine.Add("Modifications");
                headerLine.Add("ProteinName");
                headerLine.Add("ProteinDesc");
                headerLine.Add("ProteinLength");
                headerLine.Add("Start");
                headerLine.Add("End");

                for (var i = 0; i < DATASET_COUNT; i++)
                {
                    var dataName = GetDataSetNames(i);
                    headerLine.Add(dataName + "_SpectraCount");
                }

                writer.WriteLine(string.Join("\t", headerLine));

                var alignedFeatureList = alignment.GetAlignedFeatures();
                foreach (var features in alignedFeatureList)
                {
                    var mass           = features.Where(f => f != null).Select(f => f.Mass).Median();
                    var minElutionTime = features.Where(f => f != null).Select(f => f.MinElutionTime).Median();
                    var maxElutionTime = features.Where(f => f != null).Select(f => f.MaxElutionTime).Median();

                    var dataLine = new List <string>
                    {
                        PRISM.StringUtilities.DblToString(mass, 4),
                        PRISM.StringUtilities.DblToString(minElutionTime, 3),
                        PRISM.StringUtilities.DblToString(maxElutionTime, 3)
                    };

                    for (var i = 0; i < DATASET_COUNT; i++)
                    {
                        if (features[i] == null)
                        {
                            dataLine.Add("0");
                        }
                        else
                        {
                            dataLine.Add(PRISM.StringUtilities.DblToString(features[i].Abundance, 2));
                        }
                    }

                    for (var i = 0; i < DATASET_COUNT; i++)
                    {
                        if (features[i] == null)
                        {
                            dataLine.Add("0");
                        }
                        else
                        {
                            if (features[i].Score <= float.MinValue)
                            {
                                dataLine.Add(PRISM.StringUtilities.DblToStringScientific(float.MinValue, 2));
                            }
                            else
                            {
                                dataLine.Add(PRISM.StringUtilities.DblToString(features[i].Score, 3));
                            }
                        }
                    }

                    var prsm = (from f in features
                                where f?.ProteinSpectrumMatches != null && f.ProteinSpectrumMatches.Count > 0
                                select f.ProteinSpectrumMatches[0]).FirstOrDefault();

                    if (prsm == null)
                    {
                        for (var k = 0; k < 9; k++)
                        {
                            dataLine.Add(" ");
                        }
                    }
                    else
                    {
                        dataLine.Add(prsm.Pre);
                        dataLine.Add(prsm.Sequence);
                        dataLine.Add(prsm.Post);
                        dataLine.Add(prsm.Modifications);
                        dataLine.Add(prsm.ProteinName);
                        dataLine.Add(prsm.ProteinDesc);
                        dataLine.Add(prsm.ProteinLength.ToString());
                        dataLine.Add(prsm.FirstResidue.ToString());
                        dataLine.Add(prsm.LastResidue.ToString());
                    }

                    // spectral count from ms2
                    for (var i = 0; i < DATASET_COUNT; i++)
                    {
                        if (features[i] == null)
                        {
                            dataLine.Add("0");
                        }
                        else
                        {
                            dataLine.Add(features[i].ProteinSpectrumMatches.Count.ToString());
                        }
                    }

                    writer.WriteLine(string.Join("\t", dataLine));
                }
            }

            Console.WriteLine("Results written to " + outputFilePath);
        }
예제 #24
0
        private void OutputAlignmentResult(LcMsFeatureAlignment align, string outFilePath, List<string> rawFiles, bool isTemp = true)
        {
            var alignedFeatureList = align.GetAlignedFeatures();

            var writer = new StreamWriter(outFilePath);
            writer.Write("MonoMass\tMinElutionTime\tMaxElutionTime");
            for (var i = 0; i < align.CountDatasets; i++)
            {
                var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]);
                writer.Write("\t{0}", dataSetName);
            }

            for (var i = 0; i < align.CountDatasets; i++)
            {
                //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]);
                writer.Write("\t{0}_Score", i);
            }

            /*
            for (var i = 0; i < align.CountDatasets; i++)
            {
                //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]);
                writer.Write("\t{0}_Net", i);
            }*/

            writer.Write("\n");
            for (var i = 0; i < align.CountAlignedFeatures; i++)
            {
                var features = alignedFeatureList[i];
                var minMaxNet = GetMinMaxNet(features);

                writer.Write(@"{0}	{1:0.00000}	{2:0.00000}", minMaxNet.Item1, minMaxNet.Item3, minMaxNet.Item4);

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature != null ? feature.Abundance : 0d);
                }
                
                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature != null ? feature.Score : 0d);
                }
                /*
                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    if (feature != null) writer.Write("{0:0.00000}", feature.MinNet);
                    else writer.Write(0);
                }

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    if (feature != null) writer.Write("{0:0.00000}", feature.MaxNet);
                    else writer.Write(0);
                }*/

                writer.Write("\n");
            }
            writer.Close();

            if (isTemp) return;

            var outDirectory = Path.GetDirectoryName(Path.GetFullPath(outFilePath));
            for (var i = 0; i < align.CountDatasets; i++)
            {
                var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]);
                //writer.Write("\t{0}", dataSetName);
                // now output results!!                
                var ms1ftFilePath = String.Format(@"{0}\{1}.aligned.ms1ft", outDirectory, dataSetName);
                var writer2 = new StreamWriter(ms1ftFilePath);
                writer2.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < align.CountAlignedFeatures; j++)
                {
                    var f1 = alignedFeatureList[j][i];
                    writer2.Write("{0}\t", j + 1);
                    writer2.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer2.Close();
            }
        }
예제 #25
0
        private void RunFeatureAlignment(List<string> ms1FtFiles, List<string> rawFiles, string outFilePath)
        {
            var runList = new List<LcMsRun>();

            foreach(var rawFile in rawFiles)
                runList.Add(new PbfLcMsRun(rawFile));

            var align = new LcMsFeatureAlignment(ms1FtFiles, runList, new LcMsFeatureAlignComparer(new Tolerance(10)));
            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            var tempOutPath = outFilePath + ".tmp";
            OutputAlignmentResult(align, tempOutPath, rawFiles, true);
            
            align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, rawFiles, false);
        }
예제 #26
0
        public void TestIMERFeatureAlignment()
        {
            const string outFilePath = @"D:\MassSpecFiles\IMER\promex_crosstab.tsv";
            const string rawFolder   = @"D:\MassSpecFiles\IMER";
            var          runLabels   = new string[] { "1", "2", "3", "4", "5", "6" };

            var nDataset = runLabels.Length;
            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var k         = runLabels[i].Equals("2") || runLabels[i].Equals("3") ? 14 : 13;
                var rawFile   = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.pbf", rawFolder, runLabels[i], k);
                var mspFile   = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33_msgfdb_syn.txt", rawFolder, runLabels[i], k);
                var ms1FtFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.ms1ft", rawFolder, runLabels[i], k);

                Console.WriteLine(rawFile);
                Console.WriteLine(File.Exists(rawFile));

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run, 500, 15000);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsGfPlus);

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                        foreach (var match in prsmList)
                        {
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }


                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", runLabels[i]);
            }

            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
예제 #27
0
        private void OutputAlignmentResult(LcMsFeatureAlignment align, string outFilePath, IReadOnlyList <string> rawFiles, bool isTemp = true)
        {
            var alignedFeatureList = align.GetAlignedFeatures();

            var writer = new StreamWriter(outFilePath);

            writer.Write("MonoMass\tMinElutionTime\tMaxElutionTime");
            for (var i = 0; i < align.CountDatasets; i++)
            {
                var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]);
                writer.Write("\t{0}", dataSetName);
            }

            for (var i = 0; i < align.CountDatasets; i++)
            {
                //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]);
                writer.Write("\t{0}_Score", i);
            }

            /*
             * for (var i = 0; i < align.CountDatasets; i++)
             * {
             *  //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]);
             *  writer.Write("\t{0}_Net", i);
             * }*/

            writer.Write("\n");
            for (var i = 0; i < align.CountAlignedFeatures; i++)
            {
                var features  = alignedFeatureList[i];
                var minMaxNet = GetMinMaxNet(features);

                writer.Write(@"{0}\t{1:0.00000}\t{2:0.00000}", minMaxNet.Item1, minMaxNet.Item3, minMaxNet.Item4);

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature?.Abundance ?? 0d);
                }

                for (var j = 0; j < align.CountDatasets; j++)
                {
                    var feature = features[j];
                    writer.Write("\t");
                    writer.Write(feature?.Score ?? 0d);
                }

                /*
                 * for (var j = 0; j < align.CountDatasets; j++)
                 * {
                 *  var feature = features[j];
                 *  writer.Write("\t");
                 *  if (feature != null) writer.Write("{0:0.00000}", feature.MinNet);
                 *  else writer.Write(0);
                 * }
                 *
                 * for (var j = 0; j < align.CountDatasets; j++)
                 * {
                 *  var feature = features[j];
                 *  writer.Write("\t");
                 *  if (feature != null) writer.Write("{0:0.00000}", feature.MaxNet);
                 *  else writer.Write(0);
                 * }*/

                writer.Write("\n");
            }
            writer.Close();

            if (isTemp)
            {
                return;
            }

            var outDirectory = Path.GetDirectoryName(Path.GetFullPath(outFilePath));

            for (var i = 0; i < align.CountDatasets; i++)
            {
                var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]);
                //writer.Write("\t{0}", dataSetName);
                // now output results!!
                var ms1ftFilePath = string.Format(@"{0}\{1}.aligned.ms1ft", outDirectory, dataSetName);
                var writer2       = new StreamWriter(ms1ftFilePath);
                writer2.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < align.CountAlignedFeatures; j++)
                {
                    var f1 = alignedFeatureList[j][i];
                    writer2.Write("{0}\t", j + 1);
                    writer2.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer2.Close();
            }
        }
예제 #28
0
        public void OutputCrossTabWithId(string outputFilePath, LcMsFeatureAlignment alignment)
        {

            var writer = new StreamWriter(outputFilePath);

            writer.Write("MonoMass");
            writer.Write("\t");
            writer.Write("MinElutionTime");
            writer.Write("\t");
            writer.Write("MaxElutionTime");

            for (var i = 0; i < NdataSet; i++)
            {
                var dataName = GetDataSetNames(i);
                writer.Write("\t");
                writer.Write(dataName + "_Abundance");
            }

            for (var i = 0; i < NdataSet; i++)
            {
                var dataName = GetDataSetNames(i);
                writer.Write("\t");
                writer.Write(dataName + "_Ms1Score");
            }

            writer.Write("\t");
            writer.Write("Pre");
            writer.Write("\t");
            writer.Write("Sequence");
            writer.Write("\t");
            writer.Write("Post");
            writer.Write("\t");
            writer.Write("Modifications");
            writer.Write("\t");
            writer.Write("ProteinName");
            writer.Write("\t");
            writer.Write("ProteinDesc");
            writer.Write("\t");
            writer.Write("ProteinLength");
            writer.Write("\t");
            writer.Write("Start");
            writer.Write("\t");
            writer.Write("End");

            for (var i = 0; i < NdataSet; i++)
            {
                var dataName = GetDataSetNames(i); 
                writer.Write("\t");
                writer.Write(dataName + "_SpectraCount");
            }
            writer.Write("\n");

            var alignedFeatureList = alignment.GetAlignedFeatures();
            for (var j = 0; j < alignedFeatureList.Count; j++)
            {
                var features = alignedFeatureList[j];
                var mass = features.Where(f => f != null).Select(f => f.Mass).Median();
                var minElutionTime = features.Where(f => f != null).Select(f => f.MinElutionTime).Median();
                var maxElutionTime = features.Where(f => f != null).Select(f => f.MaxElutionTime).Median();
                writer.Write(mass);
                writer.Write("\t");
                writer.Write(minElutionTime);
                writer.Write("\t");
                writer.Write(maxElutionTime);

                for (var i = 0; i < NdataSet; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].Abundance);
                }

                for (var i = 0; i < NdataSet; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].Score);
                }

                var prsm = (from f in features
                            where f != null && f.ProteinSpectrumMatches != null && f.ProteinSpectrumMatches.Count > 0
                            select f.ProteinSpectrumMatches[0]).FirstOrDefault();

                if (prsm == null)
                {
                    for (var k = 0; k < 9; k++)
                    {
                        writer.Write("\t");
                        writer.Write(" ");
                    }
                }
                else
                {
                    writer.Write("\t");
                    writer.Write(prsm.Pre);
                    writer.Write("\t");
                    writer.Write(prsm.Sequence);
                    writer.Write("\t");
                    writer.Write(prsm.Post);
                    writer.Write("\t");
                    writer.Write(prsm.Modifications);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinName);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinDesc);
                    writer.Write("\t");
                    writer.Write(prsm.ProteinLength);
                    writer.Write("\t");
                    writer.Write(prsm.FirstResidue);
                    writer.Write("\t");
                    writer.Write(prsm.LastResidue);
                }

                // spectral count from ms2
                for (var i = 0; i < NdataSet; i++)
                {
                    writer.Write("\t");
                    writer.Write(features[i] == null ? 0 : features[i].ProteinSpectrumMatches.Count);
                }
                writer.Write("\n");
            }

            writer.Close();
        }
예제 #29
0
        public void TestAlignFeatures()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var tolerance      = new Tolerance(10);
            var ftComparer     = new UtexFeatureComparer(tolerance);
            var align          = new LcMsFeatureAlignment(ftComparer);
            var prsmReader     = new ProteinSpectrumMatchReader(0.01);
            var filesProcessed = 0;

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var ms1ftPath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                if (!File.Exists(ms1ftPath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath);
                    continue;
                }

                filesProcessed++;

                //var map = new ProteinSpectrumMathMap(run, i, dataset[i]);
                //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);
                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId =
                        match.ProteinName.Substring(
                            match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run);

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsMz(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                align.AddDataSet(i, features, run);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since input files not found");
            }

            align.AlignFeatures();
            Console.WriteLine("{0} alignments ", align.CountAlignedFeatures);
            align.RefineAbundance();

            var alignedFeatureList = align.GetAlignedFeatures();

            for (var i = 0; i < nDataset; i++)
            {
                var ms1ftPath = string.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]);
                var writer    = new StreamWriter(ms1ftPath);
                writer.Write(LcMsFeatureFinderLauncher.GetHeaderString());
                writer.WriteLine("\tIdedMs2ScanNums");

                for (var j = 0; j < alignedFeatureList.Count; j++)
                {
                    writer.Write(j + 1);
                    writer.Write("\t");

                    if (alignedFeatureList[j][i] == null)
                    {
                        for (var k = 0; k < 14; k++)
                        {
                            writer.Write("0\t");
                        }
                        writer.Write("0\n");
                    }
                    else
                    {
                        writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i]));
                        writer.Write("\t");

                        if (alignedFeatureList[j][i].ProteinSpectrumMatches == null)
                        {
                            writer.Write("");
                        }
                        else
                        {
                            var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum));
                            writer.Write(scanNums);
                        }

                        writer.Write("\n");
                    }
                }
                writer.Close();
            }
        }
예제 #30
0
 private void OutputCrossTabWithId(string outputFilePath, LcMsFeatureAlignment alignment, string[] runLabels)
 {
     OutputCrossTabWithId(outputFilePath, alignment, runLabels.ToList());
 }