/// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List<UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List<UMCLight> features;
            using (ISpectraProvider raw = new ThermoRawDataFileReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();
                info.Features = new InputFile();
                info.Features.Path = featureFile;

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);

                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider = RawLoaderFactory.CreateFileReader(rawFile);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return features;
        }
Example #2
0
        /// <summary>
        ///     Constructs a raw data file reader for reading the instrument (or equivalent) mass spectra.
        /// </summary>
        /// <param name="name"></param>
        /// <param name="register"></param>
        /// <returns></returns>
        public static ISpectraProvider CreateFileReader(string name)
        {
            if (name == null)
                return null;

            ISpectraProvider reader = null;
            var extension = Path.GetExtension(name);

            // Otherwise create a new one.
            switch (extension.ToLower())
            {
                case ".raw":
                    reader = new ThermoRawDataFileReader();
                    break;
                case ".mzxml":
                    reader = new MzXMLReader();
                    break;
                case ".mzml":
                    reader = new InformedProteomicsReader();
                    break;
                case ".gz":
                    if (name.ToLower().EndsWith(".mzml.gz"))
                    {
                        reader = new InformedProteomicsReader();
                    }
                    break;
                default:
                    reader = new InformedProteomicsReader();
                    break;
            }

            return reader;
        }
Example #3
0
        public void ClusterMsMs(string name,
            string resultPath,
            string sequencePath,
            SequenceFileType type,
            string baseline,
            string features,
            double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");

            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
                Features = new InputFile {Path = baseline},
                Raw = new InputFile {Path = baselineRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
                Features = new InputFile {Path = features},
                Raw = new InputFile {Path = featuresRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();
            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();
            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();
            clusterer.MzTolerance = .5;
            clusterer.MassTolerance = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange = 905;
            clusterer.Progress += clusterer_Progress;

            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List<MsmsCluster> clusters = null;
            using (var rawReader = new ThermoRawDataFileReader())
            {
                rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId);
                rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId);

                clusters = clusterer.Cluster(allFeatures, rawReader);
                Console.WriteLine("Found {0} Total Clusters", clusters.Count);
            }

            if (clusters != null)
            {
                var now = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data += string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
 private MSSpectra GetSpectrum(string path, int scan)
 {
     ISpectraProvider reader = new ThermoRawDataFileReader();
     reader.AddDataFile(path, 0);
     return GetSpectrum(reader, scan, 0);
 }