Beispiel #1
0
        private void RunFeatureAlignment(IList <string> ms1FtFiles, IReadOnlyList <string> rawFiles, string outFilePath)
        {
            var runList = new List <LcMsRun>();

            foreach (var rawFile in rawFiles)
            {
                runList.Add(new PbfLcMsRun(rawFile));
            }

            var align = new LcMsFeatureAlignment(ms1FtFiles, runList, new LcMsFeatureAlignComparer(new Tolerance(10)));

            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            var tempOutPath = outFilePath + ".tmp";

            OutputAlignmentResult(align, tempOutPath, rawFiles, true);

            align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, rawFiles, false);
        }
Beispiel #2
0
        public void TestAlignFeatures()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var tolerance      = new Tolerance(10);
            var ftComparer     = new UtexFeatureComparer(tolerance);
            var align          = new LcMsFeatureAlignment(ftComparer);
            var prsmReader     = new ProteinSpectrumMatchReader(0.01);
            var filesProcessed = 0;

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var ms1ftPath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                if (!File.Exists(ms1ftPath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath);
                    continue;
                }

                filesProcessed++;

                //var map = new ProteinSpectrumMathMap(run, i, dataset[i]);
                //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);
                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId =
                        match.ProteinName.Substring(
                            match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run);

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsMz(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                align.AddDataSet(i, features, run);
            }

            if (filesProcessed == 0)
            {
                Assert.Ignore("Skipped since input files not found");
            }

            align.AlignFeatures();
            Console.WriteLine("{0} alignments ", align.CountAlignedFeatures);
            align.RefineAbundance();

            var alignedFeatureList = align.GetAlignedFeatures();

            for (var i = 0; i < nDataset; i++)
            {
                var ms1ftPath = string.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]);
                var writer    = new StreamWriter(ms1ftPath);
                writer.Write(LcMsFeatureFinderLauncher.GetHeaderString());
                writer.WriteLine("\tIdedMs2ScanNums");

                for (var j = 0; j < alignedFeatureList.Count; j++)
                {
                    writer.Write(j + 1);
                    writer.Write("\t");

                    if (alignedFeatureList[j][i] == null)
                    {
                        for (var k = 0; k < 14; k++)
                        {
                            writer.Write("0\t");
                        }
                        writer.Write("0\n");
                    }
                    else
                    {
                        writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i]));
                        writer.Write("\t");

                        if (alignedFeatureList[j][i].ProteinSpectrumMatches == null)
                        {
                            writer.Write("");
                        }
                        else
                        {
                            var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum));
                            writer.Write(scanNums);
                        }

                        writer.Write("\n");
                    }
                }
                writer.Close();
            }
        }
Beispiel #3
0
        public List <UMCClusterLight> Cluster(List <UMCLight> data, IProgress <ProgressData> progress = null)
        {
            progress = progress ?? new Progress <ProgressData>();

            if (data.Count == 0)
            {
                return(new List <UMCClusterLight>());
            }

            this.maxFeatureId = data.Select(d => d.Id).Max();

            this.featureMap = new Dictionary <Tuple <int, int>, UMCLight>();
            foreach (var feature in data)
            {
                var key = new Tuple <int, int>(feature.GroupId, feature.Id);
                this.featureMap.Add(key, feature);
            }

            var lcmsFeatureAligner =
                new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10, ToleranceUnit.Ppm)));

            // Group features by dataset
            var idToFeatures = new Dictionary <int, List <UMCLight> >();

            foreach (var umcLight in data)
            {
                if (!idToFeatures.ContainsKey(umcLight.GroupId))
                {
                    idToFeatures.Add(umcLight.GroupId, new List <UMCLight>());
                }

                idToFeatures[umcLight.GroupId].Add(umcLight);
            }

            // Convert UMCLights to InformedProteomics LcMsFeatures
            foreach (var ds in idToFeatures)
            {
                var lcmsFeatures = new List <LcMsFeature>(ds.Value.Select(this.GetLcMsFeature));
                lcmsFeatureAligner.AddDataSet(ds.Key, lcmsFeatures, this.GetLcMsRun(ds.Key));
            }

            // Perform clustering
            lcmsFeatureAligner.AlignFeatures();

            // Fill in mising features using noise.
            lcmsFeatureAligner.RefineAbundance(-30, progress);

            var clusteredFeatures = lcmsFeatureAligner.GetAlignedFeatures();

            // Convert InformedProteomics clusters to UMCClusterLight
            int clustId  = 0;
            var clusters = new List <UMCClusterLight>();

            foreach (var cluster in clusteredFeatures)
            {
                var firstFeature = cluster.FirstOrDefault(f => f != null);
                if (firstFeature == null)
                {
                    continue;
                }

                var umcCluster = new UMCClusterLight
                {
                    Id = clustId++,
                };

                int datasetId = 0;  // Promex doesn't keep track of which dataset noise features belong to, so we need to.
                foreach (var feature in cluster)
                {
                    if (feature == null)
                    {
                        continue;
                    }

                    feature.DataSetId = datasetId++;
                    var umc = this.GetUMC(feature);
                    umcCluster.AddChildFeature(umc);
                    umc.SetParentFeature(umcCluster);
                }

                umcCluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(umcCluster);
            }

            return(clusters);
        }
        private void RunFeatureAlignment(List<string> ms1FtFiles, List<string> rawFiles, string outFilePath)
        {
            var runList = new List<LcMsRun>();

            foreach(var rawFile in rawFiles)
                runList.Add(new PbfLcMsRun(rawFile));

            var align = new LcMsFeatureAlignment(ms1FtFiles, runList, new LcMsFeatureAlignComparer(new Tolerance(10)));
            align.AlignFeatures();
            Console.WriteLine("# of aligned features = {0}", align.CountAlignedFeatures);
            var tempOutPath = outFilePath + ".tmp";
            OutputAlignmentResult(align, tempOutPath, rawFiles, true);
            
            align.RefineAbundance();
            OutputAlignmentResult(align, outFilePath, rawFiles, false);
        }