Ejemplo n.º 1
0
        public IEnumerable <UMCLight> TestUmcFeatures(string path)
        {
            var reader = new MsFeatureLightFileReader {
                Delimeter = ","
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };
            var options  = new LcmsFeatureFindingOptions(tolerances);
            var features = finder.FindFeatures(newMsFeatures.ToList(), options, null);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            return(features);
        }
Ejemplo n.º 2
0
        /// <summary>
        ///     Loads MS Features from a CSV file or existing database.
        /// </summary>
        /// <returns></returns>
        public static List<MSFeatureLight> LoadMsFeatureData(string path)
        {
            var msFeatures = new List<MSFeatureLight>();
            var extension = Path.GetExtension(path);
            if (extension == null) return msFeatures;

            extension = extension.ToUpper();
            switch (extension)
            {
                case ".PEK":
                    throw new NotImplementedException("Support for .PEK files is not available at this time");
                    // var pekReader = new PEKFileReader();
                    // var pekMsFeatures = pekReader.ReadFile(path);
                    // msFeatures.AddRange(pekMsFeatures);
                    // UpdateStatus("Loaded features from the PEK file.");
                    break;

                default:
                    var reader = new MsFeatureLightFileReader {Delimiter = ','};
                    var newMsFeatures = reader.ReadFile(path);
                    msFeatures.AddRange(newMsFeatures);
                    UpdateStatus("Loaded features from the CSV files.");
                    break;
            }

            return msFeatures;
        }
Ejemplo n.º 3
0
        /// <summary>
        ///     Loads MS Features from a CSV file or existing database.
        /// </summary>
        /// <returns></returns>
        public static List <MSFeatureLight> LoadMsFeatureData(string path, DeconToolsIsosFilterOptions isosFilterOptions)
        {
            var msFeatures = new List <MSFeatureLight>();
            var extension  = Path.GetExtension(path);

            if (extension == null)
            {
                return(msFeatures);
            }

            extension = extension.ToUpper();
            switch (extension)
            {
            case ".PEK":
                throw new NotImplementedException("Support for .PEK files is not available at this time");
                // var pekReader = new PEKFileReader();
                // var pekMsFeatures = pekReader.ReadFile(path);
                // msFeatures.AddRange(pekMsFeatures);
                // UpdateStatus("Loaded features from the PEK file.");
                break;

            default:
                var reader = new MsFeatureLightFileReader {
                    Delimiter = ','
                };
                reader.IsosFilteroptions = isosFilterOptions;
                var newMsFeatures = reader.ReadFile(path);
                msFeatures.AddRange(newMsFeatures);
                UpdateStatus("Loaded features from the CSV files.");
                break;
            }

            return(msFeatures);
        }
Ejemplo n.º 4
0
        public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount)
        {
            // Get the absolute path
            var path = GetPath(relativePath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };

            var options = new LcmsFeatureFindingOptions(tolerances);

            IScanSummaryProvider provider = null;
            var rawFilePath = path.Replace("_isos.csv", ".raw");

            UpdateStatus("Using raw data to create better features.");

            var providerCache = new ScanSummaryProviderCache();

            provider = providerCache.GetScanSummaryProvider(rawFilePath, 1);

            var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            Assert.AreEqual(expectedFeatureCount, features.Count);

            return(features);
        }
Ejemplo n.º 5
0
        public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff)
        {
            var reader = new MsFeatureLightFileReader {Delimeter = ","};
            var newMsFeatures = reader.ReadFile(path);
            var finder = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net = .05
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);
            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;
            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);

            if (features == null)
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");

            var dirPath = Path.GetDirectoryName(path);
            if (dirPath != null)
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();

                        if (chargeMap.Keys.Count < 2)
                            continue;

                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                    msFeature.Abundance, count);
                            }
                        }

                        var charges = chargeMap.Keys.ToList();

                        for (var i = 0; i < charges.Count; i++)
                        {
                            for (var j = i; j < charges.Count; j++)
                            {
                                var x = chargeMap[charges[i]];
                                var y = chargeMap[charges[j]];

                                var diff = x.MinScan() - y.MinScan();
                                if (diff > maxScanDiff)
                                {
                                    throw new Exception(
                                        "There is a problem with the feature finder across charge states");
                                }
                            }
                        }
                    }
                }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Ejemplo n.º 6
0
        public IEnumerable<UMCLight> TestUmcFeatures(string path)
        {
            var reader = new MsFeatureLightFileReader {Delimeter = ","};
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);
            var features = finder.FindFeatures(newMsFeatures.ToList(), options, null);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            return features;
        }
Ejemplo n.º 7
0
        public void CreateFeaturesTest(string relativePath, string outputPath)
        {
            var path = GetPath(relativePath);

            var tolerances  = new FeatureTolerances
            {
                Mass =  13,
                Net = .01,
                DriftTime =  30,
                FragmentationWindowSize = .5
            };

            var reader      = new MsFeatureLightFileReader();
            var rawFeatures = reader.ReadFile(path);
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                ChargeRange =  new FilterRange(1,6),
                MinimumIntensity = 200000,
                ShouldUseDeisotopingFilter =  true,
                ShouldUseIntensityFilter   =  true
            };
            rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions);

            var finder      = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight> {Tolerances = tolerances};
            finder.Progress += (sender, args) => Console.WriteLine(args.Message);
            var features    = finder.Cluster(rawFeatures.ToList());

            var filterOptions = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange
                {
                    Maximum = 30,
                    Minimum = 10
                }
            };
            features        = LcmsFeatureFilters.FilterFeatures(features, filterOptions);

            Console.WriteLine(@"Found - {0} features", features.Count);
            using (var writer = File.CreateText(GetPath(outputPath)))
            {
                var index = 0;
                foreach (var feature in features)
                {
                    feature.Id = index++;
                    feature.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                    writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}",
                        TextDelimiter,
                        feature.Net,
                        feature.ChargeState,
                        feature.Mz,
                        feature.Scan,
                        feature.MassMonoisotopic,
                        feature.MassMonoisotopicAligned,
                        feature.Id,
                        feature.ScanStart,
                        feature.ScanEnd,
                        feature.ScanAligned
                        );
                }
            }
        }
Ejemplo n.º 8
0
        public void ClusterMsMs(string name,
            string resultPath,
            string sequencePath,
            SequenceFileType type,
            string baseline,
            string features,
            double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");

            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
                Features = new InputFile {Path = baseline},
                Raw = new InputFile {Path = baselineRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
                Features = new InputFile {Path = features},
                Raw = new InputFile {Path = featuresRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();
            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();
            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();
            clusterer.MzTolerance = .5;
            clusterer.MassTolerance = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange = 905;
            clusterer.Progress += clusterer_Progress;

            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List<MsmsCluster> clusters = null;
            using (var rawReader = new ThermoRawDataFileReader())
            {
                rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId);
                rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId);

                clusters = clusterer.Cluster(allFeatures, rawReader);
                Console.WriteLine("Found {0} Total Clusters", clusters.Count);
            }

            if (clusters != null)
            {
                var now = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data += string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
Ejemplo n.º 9
0
        public void TestUmcFeatures(string relativePath, string relativeRawPath)
        {
            // Get absolute paths
            var path    = GetPath(relativePath);
            var rawPath = GetPath(relativeRawPath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures     = reader.ReadFile(path);
            var finder            = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net  = .04
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .003,
                MaximumScanRange = 50
            };


            var provider = RawLoaderFactory.CreateFileReader(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;

            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);


            if (features == null)
            {
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");
            }


            var dirPath = Path.GetDirectoryName(path);

            if (dirPath != null)
            {
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();
                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                                 msFeature.Abundance, count);
                            }
                        }
                    }
                }
            }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Ejemplo n.º 10
0
        public void TestUmcFeaturesMultipleCharges(string path, string rawPath, int maxScanDiff)
        {
            var reader = new MsFeatureLightFileReader {
                Delimeter = ","
            };
            var newMsFeatures     = reader.ReadFile(path);
            var finder            = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net  = .05
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);

            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable <UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;

            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);


            if (features == null)
            {
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");
            }


            var dirPath = Path.GetDirectoryName(path);

            if (dirPath != null)
            {
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();

                        if (chargeMap.Keys.Count < 2)
                        {
                            continue;
                        }

                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                                 msFeature.Abundance, count);
                            }
                        }


                        var charges = chargeMap.Keys.ToList();

                        for (var i = 0; i < charges.Count; i++)
                        {
                            for (var j = i; j < charges.Count; j++)
                            {
                                var x = chargeMap[charges[i]];
                                var y = chargeMap[charges[j]];

                                var diff = x.MinScan() - y.MinScan();
                                if (diff > maxScanDiff)
                                {
                                    throw new Exception(
                                              "There is a problem with the feature finder across charge states");
                                }
                            }
                        }
                    }
                }
            }


            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Ejemplo n.º 11
0
        public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile)
        {
            // Read mass tags.
            var massTagReader = new LiquidResultsFileLoader(liquidResultsPath);
            var massTags      = massTagReader.LoadDatabase();

            // Get identifications - this rereads the liquid results file, but I'm leaving it that way
            // for now because this is just a test.
            var scansToIds = this.GetIds(liquidResultsPath);

            // Read raw data file.
            var spectraProviderCache = new SpectraProviderCache();
            var spectraProvider      = spectraProviderCache.GetSpectraProvider(rawFile);

            // Read isos features
            var isosReader = new MsFeatureLightFileReader();

            isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions {
                MaximumIsotopicFit = 0.15
            };
            var msFeatures = isosReader.ReadFile(isosFile).ToList();

            // Get LCMS features
            var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider);
            var lcmsFeatures       = msFeatureClusterer.Convert(msFeatures);

            lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; });

            // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping
            // between LCMS features and clusters.
            var clusters = new List <UMCClusterLight> {
                Capacity = lcmsFeatures.Count
            };

            foreach (var lcmsFeature in lcmsFeatures)
            {
                var cluster = new UMCClusterLight(lcmsFeature);
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Median);
                clusters.Add(cluster);
            }

            // Do STAC AMT matching
            var stacAdapter = new STACAdapter <UMCClusterLight>
            {
                Options = new FeatureMatcherParameters
                {
                    ShouldCalculateShiftFDR = false,
                    UsePriors           = true,
                    UseEllipsoid        = true,
                    UseDriftTime        = false,
                    ShouldCalculateSTAC = true,
                }
            };
            var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags);

            // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID,
            // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs.
            var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>());

            foreach (var amtMatch in amtMatches)
            {
                var cluster = amtMatch.Observed;
                var massTag = amtMatch.Target;
                ms1Matches[cluster].Add(new Protein
                {
                    Name            = massTag.ProteinName,
                    Sequence        = massTag.PeptideSequence,
                    ChemicalFormula = massTag.PeptideSequence
                });
            }

            // Now we need to backtrack MS/MS identifications -> clusters
            var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >();

            foreach (var cluster in clusters)
            {
                ms2Matches.Add(cluster, new List <Protein>());
                foreach (var lcmsFeature in cluster.UmcList)
                {
                    foreach (var msFeature in lcmsFeature.MsFeatures)
                    {
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            if (scansToIds.ContainsKey(msmsFeature.Scan))
                            {
                                ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]);
                            }
                        }
                    }
                }
            }

            // How many clusters have IDs from MS/MS?
            var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any());
            var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any());

            int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications.

            // Finally compare the MS1 IDs to the MS2 IDs.
            foreach (var cluster in clusters)
            {
                // For now only comparing by name
                var ms1Ids    = ms1Matches[cluster];
                var ms1Lipids = ms1Ids.Select(id => id.Name);

                var ms2Ids    = ms2Matches[cluster];
                var ms2Lipids = ms2Ids.Select(id => id.Name);

                // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster.
                var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid));
                var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid));

                overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count();

                // Write Results
                if (ms1OnlyIds.Any() || ms2OnlyIds.Any())
                {
                    Console.WriteLine("Cluster {0}:", cluster.Id);
                    if (ms1OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs1 Only IDs:");
                        foreach (var id in ms1OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }

                    if (ms2OnlyIds.Any())
                    {
                        Console.WriteLine("\tMs2 Only IDs:");
                        foreach (var id in ms2OnlyIds)
                        {
                            Console.WriteLine("\t\t{0}", id);
                        }
                    }
                }
            }

            Console.WriteLine("Overlap: {0}", overlapCount);
        }
Ejemplo n.º 12
0
        public void TestUmcFeatures(string relativePath, string relativeRawPath)
        {
            // Get absolute paths
            var path = GetPath(relativePath);
            var rawPath = GetPath(relativeRawPath);

            var reader = new MsFeatureLightFileReader { Delimiter = ',' };
            var newMsFeatures = reader.ReadFile(path);
            var finder = new UmcTreeFeatureFinder();
            var featureTolerances = new FeatureTolerances
            {
                Mass = 12,
                Net = .04
            };
            var options = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .003,
                MaximumScanRange = 50
            };

            var provider = RawLoaderFactory.CreateFileReader(rawPath);
            provider.AddDataFile(rawPath, 0);

            var start = DateTime.Now;
            IEnumerable<UMCLight> features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);
            var end = DateTime.Now;
            Console.WriteLine(@"Test Took: " + end.Subtract(start).TotalSeconds);

            if (features == null)
                throw new NullReferenceException("The feature list came back empty.  This is a problem.");

            var dirPath = Path.GetDirectoryName(path);
            if (dirPath != null)
                using (
                    var writer =
                        File.CreateText(Path.Combine(dirPath, Path.GetFileName(path).Replace("_isos.csv", "_xics.csv")))
                    )
                {
                    foreach (var feature in features)
                    {
                        writer.WriteLine();
                        writer.WriteLine("Feature {0}", feature.Id);
                        var chargeMap = feature.CreateChargeMap();
                        foreach (var charge in chargeMap.Keys)
                        {
                            writer.WriteLine();
                            foreach (var msFeature in chargeMap[charge])
                            {
                                var count = msFeature.MSnSpectra.Count;
                                writer.WriteLine("{0},{1},{2},{3},{4}", charge, msFeature.Mz, msFeature.Scan,
                                    msFeature.Abundance, count);
                            }
                        }
                    }
                }

            // Work on total feature count here.
            Assert.Greater(features.Count(), 0);
        }
Ejemplo n.º 13
0
        public void CreateFeaturesTest(string relativePath, string outputPath)
        {
            var path = GetPath(relativePath);

            var tolerances = new FeatureTolerances
            {
                Mass      = 13,
                Net       = .01,
                DriftTime = 30,
                FragmentationWindowSize = .5
            };

            var reader          = new MsFeatureLightFileReader();
            var rawFeatures     = reader.ReadFile(path);
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                ChargeRange                = new FilterRange(1, 6),
                MinimumIntensity           = 200000,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            rawFeatures = LcmsFeatureFilters.FilterMsFeatures(rawFeatures, msFilterOptions);

            var finder = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight> {
                Tolerances = tolerances
            };

            finder.Progress += (sender, args) => Console.WriteLine(args.Message);
            var features = finder.Cluster(rawFeatures.ToList());

            var filterOptions = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange
                {
                    Maximum = 30,
                    Minimum = 10
                }
            };

            features = LcmsFeatureFilters.FilterFeatures(features, filterOptions);

            Console.WriteLine(@"Found - {0} features", features.Count);
            using (var writer = File.CreateText(GetPath(outputPath)))
            {
                var index = 0;
                foreach (var feature in features)
                {
                    feature.Id = index++;
                    feature.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                    writer.WriteLine("{1}{0}{2}{0}{3}{0}{4}{0}{5}{0}{6}{0}{7}{0}{8}{0}{9}{0}{10}",
                                     TextDelimiter,
                                     feature.Net,
                                     feature.ChargeState,
                                     feature.Mz,
                                     feature.Scan,
                                     feature.MassMonoisotopic,
                                     feature.MassMonoisotopicAligned,
                                     feature.Id,
                                     feature.ScanStart,
                                     feature.ScanEnd,
                                     feature.ScanAligned
                                     );
                }
            }
        }
Ejemplo n.º 14
0
        public void ClusterMsMs(string name,
                                string resultPath,
                                string sequencePath,
                                SequenceFileType type,
                                string baseline,
                                string features,
                                double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");


            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
            };

            baselineInfo.InputFiles.Add(new InputFile {
                Path = baseline, FileType = InputFileType.Features
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = baselineRaw, FileType = InputFileType.Raw
            });
            baselineInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
            };

            aligneeInfo.InputFiles.Add(new InputFile {
                Path = features, FileType = InputFileType.Features
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = featuresRaw, FileType = InputFileType.Raw
            });
            aligneeInfo.InputFiles.Add(new InputFile {
                Path = sequencePath, FileType = InputFileType.Sequence
            });

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();

            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();

            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);


            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId  = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();

            clusterer.MzTolerance      = .5;
            clusterer.MassTolerance    = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange           = 905;
            clusterer.Progress           += clusterer_Progress;

            var allFeatures = new List <UMCLight>();

            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List <MsmsCluster> clusters = null;
            var spectraProviderCache    = new SpectraProviderCache();

            spectraProviderCache.GetSpectraProvider(baselineInfo.RawFile.Path, baselineInfo.DatasetId);
            spectraProviderCache.GetSpectraProvider(aligneeInfo.RawFile.Path, aligneeInfo.DatasetId);


            clusters = clusterer.Cluster(allFeatures, spectraProviderCache);
            Console.WriteLine("Found {0} Total Clusters", clusters.Count);

            if (clusters != null)
            {
                var now            = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                                                   name,
                                                   now.Year,
                                                   now.Month,
                                                   now.Day,
                                                   now.Hour,
                                                   now.Minute,
                                                   now.Second,
                                                   resultPath
                                                   );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                                               name,
                                               now.Year,
                                               now.Month,
                                               now.Day,
                                               now.Hour,
                                               now.Minute,
                                               now.Second,
                                               resultPath
                                               );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data     = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data     += string.Format("{0},{1},{2},{3},{4},{5}",
                                                      feature.GroupId,
                                                      feature.Id,
                                                      feature.MassMonoisotopic,
                                                      feature.Mz,
                                                      feature.ChargeState,
                                                      feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                                     feature.GroupId,
                                                     feature.Id,
                                                     feature.MassMonoisotopic,
                                                     feature.Mz,
                                                     feature.ChargeState,
                                                     feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }