public FeatureDisplayCommand(DatasetInformation information)
     : base(null, AlwaysPass)
 {
     m_information = information;
     m_window = null;
     m_name = "Features " + information.DatasetName;
 }
Exemple #2
0
 public Factor(int factorId, DatasetInformation datasetId, string factorName, string factorValue)
 {
     m_id = factorId;
     m_dataset = datasetId;
     m_factorName = factorName;
     m_factorValue = factorValue;
 }
Exemple #3
0
 public FeatureDisplayCommand(DatasetInformation information)
     : base(null, AlwaysPass)
 {
     m_information = information;
     m_window      = null;
     m_name        = "Features " + information.DatasetName;
 }
Exemple #4
0
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List <UMCLight> features;

            using (ISpectraProvider raw = new InformedProteomicsReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();

                info.InputFiles.Add(new InputFile {
                    Path = featureFile, FileType = InputFileType.Features
                });

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net  = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);


                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider   = RawLoaderFactory.CreateFileReader(rawFile);
                provider.AddDataFile(rawFile, 0);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return(features);
        }
Exemple #5
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List <UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List <MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions,
            IScanSummaryProvider provider,
            IProgress <ProgressData> progress = null)
        {
            // Make features
            if (msFeatures.Count < 1)
            {
                throw new Exception("No features were found in the feature files provided.");
            }

            UpdateStatus("Finding features.");

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider, progress);

            UpdateStatus("Filtering features.");
            List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider);

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return(filteredFeatures);
        }
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List<UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List<UMCLight> features;
            using (ISpectraProvider raw = new ThermoRawDataFileReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();
                info.Features = new InputFile();
                info.Features.Path = featureFile;

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);

                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider = RawLoaderFactory.CreateFileReader(rawFile);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return features;
        }
Exemple #7
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList <UMCLight> FindFeatures(DatasetInformation information,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder)

        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path))
            {
                rawProviderX.AddDataFile(information.RawFile.Path, 0);
                UpdateStatus("Creating LCMS Features.");
                var features = featureFinder.FindFeatures(msFeatures,
                                                          featureFindingOptions,
                                                          rawProviderX);
                features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List <MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }
                        }

                        if (msFeature.MSnSpectra.Count > 0)
                        {
                            lightEntry.Add(msFeature);
                        }
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequenceFile.Path,
                                       features,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return(features);
            }
        }
Exemple #8
0
        public DatasetInformationViewModel(DatasetInformation information)
        {
            m_information = information;
            var data = information.PlotData;

            PlotData = new ObservableCollection <PlotViewModel>();

            RequestRemovalCommand = new BaseCommand(
                () =>
            {
                if (RemovalRequested != null)
                {
                    RemovalRequested(this, EventArgs.Empty);
                }
            }, s => !this.DoingWork);

            if (data != null)
            {
                PlotData.Add(new PlotViewModel(data.Alignment,
                                               "Alignment",
                                               new PictureDisplayCommand(data.Alignment, "Alignment" + information.DatasetName)));
                PlotData.Add(new PlotViewModel(data.Features,
                                               "Features",
                                               new FeatureDisplayCommand(information)));

                PlotData.Add(new PlotViewModel(data.MassErrorHistogram, "Mass Error Histogram"));
                PlotData.Add(new PlotViewModel(data.NetErrorHistogram, "NET Error Histogram"));
                PlotData.Add(new PlotViewModel(data.MassScanResidual, "Mass vs Scan Residuals"));
                PlotData.Add(new PlotViewModel(data.MassMzResidual, "Mass vs m/z Residuals"));
                PlotData.Add(new PlotViewModel(data.NetResiduals, "NET Residuals"));
            }


            ModifyDatasetCommand = new ShowDatasetDetailCommand();
        }
Exemple #9
0
        private void ExportAlignmentData(AlignmentData data,
                                         DatasetInformation baselineDatasetInformation,
                                         DatasetInformation alignDatasetInformation,
                                         IEnumerable <UMCLight> baselineFeatures,
                                         IEnumerable <UMCLight> aligneeFeatures)
        {
            var netValues  = new List <double>();
            var massValues = new List <double>();


            var anchorPoints = data.Matches;

            foreach (var match in anchorPoints)
            {
                netValues.Add(match.AnchorPointX.Net - match.AnchorPointY.Net);
                massValues.Add(match.AnchorPointX.Mass - match.AnchorPointY.Mass);
            }


            var netHist =
                MatchCountHistogramBuilder.CreateResidualHistogram(-.05, .05, .01, netValues);


            var netHistogram = new Dictionary <double, int>();

            Console.WriteLine();
            for (var i = 0; i < netHist.Bins.Count; i++)
            {
                netHistogram.Add(netHist.Bins[i], Convert.ToInt32(netHist.Data[i]));
                Console.WriteLine("{0}\t{1}", netHist.Bins[i], netHist.Data[i]);
            }
        }
Exemple #10
0
 public static void AddDataset(DatasetInformation dataset)
 {
     if (!m_datasets.ContainsKey(dataset.DatasetId))
     {
         m_datasets.Add(dataset.DatasetId, dataset);
     }
 }
Exemple #11
0
 public Factor(int factorId, DatasetInformation datasetId, string factorName, string factorValue)
 {
     m_id          = factorId;
     m_dataset     = datasetId;
     m_factorName  = factorName;
     m_factorValue = factorValue;
 }
        public DatasetInformationViewModel(DatasetInformation information)
        {
            m_information = information;
            var data = information.PlotData;
            PlotData = new ObservableCollection<PlotViewModel>();

            RequestRemovalCommand = new BaseCommand(
                () =>
                    {
                        if (RemovalRequested != null)
                        {
                            RemovalRequested(this, EventArgs.Empty);
                        }
                    }, s => !this.DoingWork);

            if (data != null)
            {
                PlotData.Add(new PlotViewModel(data.Alignment,
                    "Alignment",
                    new PictureDisplayCommand(data.Alignment, "Alignment" + information.DatasetName)));
                PlotData.Add(new PlotViewModel(data.Features,
                    "Features",
                    new FeatureDisplayCommand(information)));

                PlotData.Add(new PlotViewModel(data.MassErrorHistogram, "Mass Error Histogram"));
                PlotData.Add(new PlotViewModel(data.NetErrorHistogram, "NET Error Histogram"));
                PlotData.Add(new PlotViewModel(data.MassScanResidual, "Mass vs Scan Residuals"));
                PlotData.Add(new PlotViewModel(data.MassMzResidual, "Mass vs m/z Residuals"));
                PlotData.Add(new PlotViewModel(data.NetResiduals, "NET Residuals"));
            }

            ModifyDatasetCommand = new ShowDatasetDetailCommand();
        }
Exemple #13
0
        private IList <UMCLight> AlignDataset(
            IList <UMCLight> features,
            IEnumerable <UMCLight> baselineFeatures,
            MassTagDatabase database,
            DatasetInformation datasetInfo,
            DatasetInformation baselineInfo)
        {
            AlignmentData alignmentData;

            if (baselineInfo == null && database == null)
            {
                throw new NullReferenceException("No reference was set for LC-MS alignment.");
            }

            // align the data.
            if (baselineFeatures != null && baselineInfo != null && baselineInfo.IsBaseline)
            {
                // Align pairwise and cache results intermediately.
                var aligner = m_algorithms.DatasetAligner;
                RegisterProgressNotifier(aligner);

                UpdateStatus("Aligning " + datasetInfo.DatasetName + " to baseline.");
                alignmentData = aligner.Align(baselineFeatures, features);

                DeRegisterProgressNotifier(aligner);
            }
            else
            {
                // Align pairwise and cache results intermediately.
                var aligner = m_algorithms.DatabaseAligner;
                RegisterProgressNotifier(aligner);

                UpdateStatus("Aligning " + datasetInfo.DatasetName + " to mass tag database.");
                alignmentData = aligner.Align(database, features);

                DeRegisterProgressNotifier(aligner);
            }

            if (alignmentData != null)
            {
                alignmentData.AligneeDataset = datasetInfo.DatasetName;
                alignmentData.DatasetID      = datasetInfo.DatasetId;
            }

            var args = new FeaturesAlignedEventArgs(datasetInfo,
                                                    baselineFeatures,
                                                    features,
                                                    alignmentData);

            if (FeaturesAligned != null)
            {
                FeaturesAligned(this, args);
            }

            UpdateStatus("Updating cache with aligned features.");
            return(features);
        }
 /// <summary>
 ///     Arguments that hold alignment information when a dataset is aligned.
 /// </summary>
 public FeaturesAlignedEventArgs(DatasetInformation datasetInfo,
     IEnumerable<UMCLight> baselineFeatures,
     IEnumerable<UMCLight> aligneeFeatures,
     classAlignmentData alignmentData)
 {
     m_datasetInformation = datasetInfo;
     BaselineFeatures = baselineFeatures;
     AligneeFeatures = aligneeFeatures;
     AlignmentData = alignmentData;
 }
Exemple #15
0
 /// <summary>
 ///     Arguments that hold alignment information when a dataset is aligned.
 /// </summary>
 public FeaturesAlignedEventArgs(DatasetInformation datasetInfo,
                                 IEnumerable <UMCLight> baselineFeatures,
                                 IEnumerable <UMCLight> aligneeFeatures,
                                 AlignmentData alignmentData)
 {
     m_datasetInformation = datasetInfo;
     BaselineFeatures     = baselineFeatures;
     AligneeFeatures      = aligneeFeatures;
     AlignmentData        = alignmentData;
 }
Exemple #16
0
        /// <summary>
        ///     Adds a new dataset to the list.
        /// </summary>
        /// <returns>A list of added datasets</returns>
        private List <DatasetInformation> ConvertInputFilesIntoDatasets(List <InputFile> inputFiles)
        {
            var addedSets  = new List <DatasetInformation>();
            var datasetMap = new Dictionary <string, DatasetInformation>();
            var inputMap   = new Dictionary <string, List <InputFile> >();

            foreach (var file in inputFiles)
            {
                var name        = Path.GetFileName(file.Path);
                var datasetName = ExtractDatasetName(name);
                var isEntryMade = inputMap.ContainsKey(datasetName);
                if (!isEntryMade)
                {
                    inputMap.Add(datasetName, new List <InputFile>());
                }

                inputMap[datasetName].Add(file);
            }

            var i = 0;

            foreach (var datasetName in inputMap.Keys)
            {
                var files = inputMap[datasetName];
                var datasetInformation = new DatasetInformation {
                    DatasetId = i++, DatasetName = datasetName
                };

                var doesDatasetExist = datasetMap.ContainsKey(datasetName);

                // Here we map the old dataset if it existed already.
                if (datasetMap.ContainsKey(datasetName))
                {
                    datasetInformation = datasetMap[datasetName];
                }

                datasetInformation.InputFiles.AddRange(files);

                // Add the dataset
                if (!doesDatasetExist)
                {
                    addedSets.Add(datasetInformation);
                }
            }

            // Reformat their Id's
            var id = 0;

            foreach (var x in addedSets)
            {
                x.DatasetId = id++;
            }
            return(addedSets);
        }
Exemple #17
0
        public static void FillDatasetInformation(InstanceModel model, IEnumerable <Instance> instances)
        {
            var datasetInformation = new DatasetInformation();

            int objWithIncompleteData = instances.Count(instance => model.Features.Any(feature => FeatureValue.IsMissing(instance[feature])));

            datasetInformation.FeatureInformations       = model.Features.Select(feature => feature.FeatureInformation).ToArray();
            datasetInformation.ObjectsWithIncompleteData = objWithIncompleteData;
            datasetInformation.GlobalAbscenseInformation = model.Features.Sum(feature => feature.FeatureInformation.MissingValueCount);
            model.DatasetInformation = datasetInformation;
        }
Exemple #18
0
        private double GetNet(DatasetInformation dataset, int scan)
        {
            var minScan = dataset.ScanTimes.Keys.Min();
            var minEt   = dataset.ScanTimes[minScan];

            var maxScan = dataset.ScanTimes.Keys.Max();
            var maxEt   = dataset.ScanTimes[maxScan];

            var et = dataset.ScanTimes[scan];

            return((et - minEt) / (maxEt - minEt));
        }
Exemple #19
0
        /// <summary>
        ///     Loads feature data from the files provided.
        /// </summary>
        /// <returns></returns>
        public static IList <UMCLight> LoadUmcFeatureData(DatasetInformation dataset, IUmcDAO featureCache, IScanSummaryProvider provider = null)
        {
            var features  = new List <UMCLight>();
            var extension = Path.GetExtension(dataset.Features.Path);

            if (extension == null)
            {
                return(features);
            }

            extension = extension.ToUpper();
            switch (extension)
            {
            case ".TXT":
                if (dataset.Features.Path.EndsWith("_LCMSFeatures.txt"))
                {
                    var reader = new LcImsFeatureFileReader(provider, dataset.DatasetId);
                    features = reader.ReadFile(dataset.Features.Path).ToList();
                }
                else
                {
                    var umcReader = new LCMSFeatureFileReader(dataset.Features.Path);
                    features = umcReader.GetUmcList();
                }
                break;

            case ".DB3":
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;

            case ".MS1FT":
                if (provider != null && provider is InformedProteomicsReader)
                {
                    var promexReader = new PromexFileReader(provider as InformedProteomicsReader, dataset.DatasetId);
                    features = promexReader.ReadFile(dataset.Features.Path).ToList();
                }
                break;

            default:     //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3"
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;
            }

            if (features != null && provider is ISpectraProvider)
            {
                var spectraProvider = provider as ISpectraProvider;
                LoadMsMs(features, spectraProvider);
            }
            return(features);
        }
Exemple #20
0
        /// <summary>
        ///     Loads dataset information from the path provided.
        /// </summary>
        /// <param name="info"></param>
        /// <returns></returns>
        public DatasetPlotInformation LoadDatasetPlots(string[] files, DatasetInformation info)
        {
            var plotInfo = new DatasetPlotInformation();

            var fileList = new List <string>();

            fileList.AddRange(files);

            var datasetFiles = new List <string>();
            var name         = info.DatasetName.ToLower();

            foreach (var filename in fileList)
            {
                var file = filename.ToLower();
                if (file.Contains(name))
                {
                    if (file.Contains("_features"))
                    {
                        plotInfo.Features = file;
                    }
                    else if (file.Contains("_heatmap"))
                    {
                        plotInfo.Alignment = file;
                    }
                    else if (file.Contains("_masshistogram"))
                    {
                        plotInfo.MassErrorHistogram = file;
                    }
                    else if (file.Contains("_nethistogram"))
                    {
                        plotInfo.NetErrorHistogram = file;
                    }
                    else if (file.Contains("_massmzresidual"))
                    {
                        plotInfo.MassMzResidual = file;
                    }
                    else if (file.Contains("_massscanresidual"))
                    {
                        plotInfo.MassScanResidual = file;
                    }
                    else if (file.Contains("_netresidual"))
                    {
                        plotInfo.NetResiduals = file;
                    }
                }
            }
            info.PlotData = plotInfo;
            return(plotInfo);
        }
Exemple #21
0
        /// <summary>
        ///     Loads dataset information from the path provided.
        /// </summary>
        /// <param name="info"></param>
        /// <returns></returns>
        public DatasetPlotInformation LoadDatasetPlots(string[] files, DatasetInformation info)
        {
            var plotInfo = new DatasetPlotInformation();

            var fileList = new List<string>();
            fileList.AddRange(files);

            var datasetFiles = new List<string>();
            var name = info.DatasetName.ToLower();

            foreach (var filename in fileList)
            {
                var file = filename.ToLower();
                if (file.Contains(name))
                {
                    if (file.Contains("_features"))
                    {
                        plotInfo.Features = file;
                    }
                    else if (file.Contains("_heatmap"))
                    {
                        plotInfo.Alignment = file;
                    }
                    else if (file.Contains("_masshistogram"))
                    {
                        plotInfo.MassErrorHistogram = file;
                    }
                    else if (file.Contains("_nethistogram"))
                    {
                        plotInfo.NetErrorHistogram = file;
                    }
                    else if (file.Contains("_massmzresidual"))
                    {
                        plotInfo.MassMzResidual = file;
                    }
                    else if (file.Contains("_massscanresidual"))
                    {
                        plotInfo.MassScanResidual = file;
                    }
                    else if (file.Contains("_netresidual"))
                    {
                        plotInfo.NetResiduals = file;
                    }
                }
            }
            info.PlotData = plotInfo;
            return plotInfo;
        }
Exemple #22
0
        public DatasetInformationViewModel(DatasetInformation information)
        {
            m_information = information;
            var data = information.PlotData;

            RequestRemovalCommand = new RelayCommand(
                () =>
            {
                if (RemovalRequested != null)
                {
                    RemovalRequested(this, EventArgs.Empty);
                }
            }, () => !this.DoingWork);

            this.SetDatasetState();
        }
Exemple #23
0
        private List <DatasetInformation> CreateDatasetsFromInputFile(List <InputFile> inputFiles, bool findAdditionalFiles = false)
        {
            var datasets = new List <DatasetInformation>();

            var datasetMap = new Dictionary <string, List <InputFile> >();

            foreach (var file in inputFiles)
            {
                var name        = System.IO.Path.GetFileName(file.Path);
                var datasetName = ExtractDatasetName(name);
                var isEntryMade = datasetMap.ContainsKey(datasetName);
                if (!isEntryMade)
                {
                    datasetMap.Add(datasetName, new List <InputFile>());
                }
                datasetMap[datasetName].Add(file);
            }

            var i = 0;

            foreach (var datasetName in datasetMap.Keys)
            {
                var files = datasetMap[datasetName];
                var datasetInformation = new DatasetInformation {
                    DatasetId = i++, DatasetName = datasetName
                };

                // Get additional files
                if (findAdditionalFiles)
                {
                    // Try to use the location of the feature file first, otherwise just use first available file.
                    var featureFile = files.FirstOrDefault(file => file.FileType == InputFileType.Features);
                    var fileToUse   = featureFile ?? files.FirstOrDefault();
                    if (fileToUse != null)
                    {
                        files.AddRange(this.FindAdditionalDatasetFiles(fileToUse));
                    }
                }

                datasetInformation.InputFiles.AddRange(files);
                datasets.Add(datasetInformation);
            }
            return(datasets);
        }
Exemple #24
0
 /// <summary>
 /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder,
 /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value,
 /// which is used for filtering the features by length
 /// </summary>
 /// <param name="information"></param>
 /// <param name="options"></param>
 /// <param name="filterOptions"></param>
 private static void ValidateFeatureFinderMaxScanLength(
     DatasetInformation information,
     LcmsFeatureFindingOptions options,
     LcmsFeatureFilteringOptions filterOptions)
 {
     if (!filterOptions.FilterOnMinutes)
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeMinutes.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeMinutes.Maximum;
         }
     }
     else
     {
         if (options.MaximumScanRange < filterOptions.FeatureLengthRangeScans.Maximum)
         {
             // Bump up the scan range used by the LCMS Feature Finder to allow for longer features
             options.MaximumScanRange = (int)filterOptions.FeatureLengthRangeScans.Maximum;
         }
     }
 }
Exemple #25
0
        public AlignmentData AlignToDatabase(
            ref IList <UMCLight> features,
            DatasetInformation datasetInfo,
            MassTagDatabase mtdb,
            IProgress <ProgressData> progress = null)
        {
            progress = progress ?? new Progress <ProgressData>();
            var aligner       = this.m_algorithms.DatabaseAligner;
            var alignmentData = aligner.Align(mtdb, features, progress);

            aligner.Progress += aligner_Progress;

            if (alignmentData != null)
            {
                alignmentData.AligneeDataset = datasetInfo.DatasetName;
                alignmentData.DatasetID      = datasetInfo.DatasetId;
            }

            aligner.Progress -= aligner_Progress;

            return(alignmentData);
        }
Exemple #26
0
        public AlignmentData AlignToDataset(
            ref IList <UMCLight> features,
            DatasetInformation datasetInfo,
            IEnumerable <UMCLight> baselineFeatures,
            IProgress <ProgressData> progress = null)
        {
            progress = progress ?? new Progress <ProgressData>();
            // Align pairwise and cache results intermediately.
            var aligner = this.m_algorithms.DatasetAligner;

            aligner.Progress += aligner_Progress;

            var alignmentData = aligner.Align(baselineFeatures, features, progress);

            if (alignmentData != null)
            {
                alignmentData.AligneeDataset = datasetInfo.DatasetName;
                alignmentData.DatasetID      = datasetInfo.DatasetId;
            }

            aligner.Progress -= aligner_Progress;

            return(alignmentData);
        }
Exemple #27
0
        /// <summary>
        /// Partition the current view into (numSectionsPerAxis)^2 sections and select the top
        /// "featuresPerSection" in each section.
        /// </summary>
        /// <param name="dataset">Dataset to get features points for.</param>
        /// <param name="globalMax">The maximum mass in all datasets.</param>
        /// <returns>
        /// Collection of datapoints for features.
        /// Item 1: LCMS feature datapoints. Item2: MS Feature datapoints.
        /// </returns>
        private IEnumerable <FeaturePoint> GetPartitionedPoints(DatasetInformation dataset, double globalMax)
        {
            var netActMaximum  = this.netAxis.ActualMaximum.Equals(0) ? 1.0 : this.netAxis.ActualMaximum;
            var massActMaximum = this.massAxis.ActualMaximum.Equals(0) ? globalMax : this.massAxis.ActualMaximum;

            var netStep  = (netActMaximum - this.netAxis.ActualMinimum) / this.numSectionsPerAxis;
            var massStep = (massActMaximum - this.massAxis.ActualMinimum) / this.numSectionsPerAxis;

            var featureHash = new HashSet <FeaturePoint>();

            var featureTree = this.quadTrees[dataset];

            for (int i = 0; i < this.numSectionsPerAxis; i++)
            {
                var netMin = this.netAxis.ActualMinimum + (i * netStep);
                var netMax = this.netAxis.ActualMinimum + ((i + 1) * netStep);
                for (int j = 0; j < this.numSectionsPerAxis; j++)
                {
                    var massMin      = this.massAxis.ActualMinimum + (j * massStep);
                    var massMax      = this.massAxis.ActualMinimum + ((j + 1) * massStep);
                    var treeFeatures = featureTree.Query(new RectangleF
                    {
                        X      = (float)netMin,
                        Y      = (float)massMin,
                        Height = (float)(massMax - massMin),
                        Width  = (float)(netMax - netMin)
                    });
                    var featureRange = treeFeatures.OrderByDescending(feat => feat.UMCLight.Abundance)
                                       .Take(this.featuresPerSection);

                    featureHash.UnionWith(featureRange);
                }
            }

            return(featureHash);
        }
        public classAlignmentData AlignToDataset(
            ref IList<UMCLight> features,
            IEnumerable<UMCLight> baselineFeatures,
            DatasetInformation datasetInfo,
            DatasetInformation baselineInfo)
        {
            classAlignmentData alignmentData;
            if (baselineInfo == null)
            {
                throw new NullReferenceException("No reference was set for LC-MS alignment.");
            }
            // Align pairwise and cache results intermediately.
            var aligner = this.m_algorithms.DatasetAligner;
            alignmentData = aligner.Align(baselineFeatures, features);

            if (alignmentData != null)
            {
                alignmentData.aligneeDataset = datasetInfo.DatasetName;
                alignmentData.DatasetID = datasetInfo.DatasetId;
            }

            //var args = new FeaturesAlignedEventArgs(datasetInfo, baselineFeatures, features, alignmentData);
            return alignmentData;
        }
        /// <summary>
        ///     Loads baseline data for alignment.
        /// </summary>
        private IList<UMCLight> LoadBaselineData(DatasetInformation baselineInfo,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            FeatureDataAccessProviders dataProviders,
            MassTagDatabase database,
            bool shouldUseMassTagDbAsBaseline)
        {
            IList<UMCLight> baselineFeatures = null;

            UpdateStatus("Loading baseline features.");
            if (!shouldUseMassTagDbAsBaseline)
            {
                if (baselineInfo == null)
                {
                    throw new Exception("The baseline dataset was never set.");
                }

                var cache = new FeatureLoader
                {
                    Providers = dataProviders
                };

                RegisterProgressNotifier(cache);

                UpdateStatus("Loading baseline features from " + baselineInfo.DatasetName + " for alignment.");

                baselineFeatures = cache.LoadDataset(baselineInfo,
                    msFilterOptions,
                    lcmsFindingOptions,
                    lcmsFilterOptions);

                cache.CacheFeatures(baselineFeatures);
                if (BaselineFeaturesLoaded != null)
                {
                    BaselineFeaturesLoaded(this,
                        new BaselineFeaturesLoadedEventArgs(baselineInfo, baselineFeatures.ToList()));
                }

                DeRegisterProgressNotifier(cache);
            }
            else
            {
                if (database == null)
                    throw new NullReferenceException(
                        "The mass tag database has to have data in it if it's being used for drift time alignment.");

                UpdateStatus("Setting baseline features for post drift time alignment from mass tag database.");
                var tags = FeatureDataConverters.ConvertToUMC(database.MassTags);

                if (BaselineFeaturesLoaded == null)
                    return tags;

                if (tags != null)
                    BaselineFeaturesLoaded(this, new BaselineFeaturesLoadedEventArgs(null, tags.ToList(), database));
            }
            return baselineFeatures;
        }
Exemple #30
0
        /// <summary>Get scatter points for MS features and a rectangle annotation for the LCMS feature.</summary>
        /// <param name="feature">An LCMS feature.</param>
        /// <param name="dataset">The dataset that the LCMS feature comes from.</param>
        /// <returns>The tuple containing the LCMS feature annotation and the MS feature scatter points..</returns>
        private Tuple <RectangleAnnotation, IEnumerable <ScatterPoint> > GetMsFeaturesAndAnnotations(FeaturePoint feature, DatasetInformation dataset)
        {
            var msdataPoints = new List <ScatterPoint> {
                Capacity = feature.UMCLight.MsFeatures.Count
            };

            var minNet  = double.PositiveInfinity;
            var maxNet  = 0.0;
            var minMass = double.PositiveInfinity;
            var maxMass = 0.0;

            foreach (var msfeature in feature.UMCLight.MsFeatures)
            {
                var net = this.GetNet(dataset, msfeature.Scan);
                minNet  = Math.Min(minNet, net);
                maxNet  = Math.Max(maxNet, net);
                minMass = Math.Min(minMass, msfeature.MassMonoisotopic);
                maxMass = Math.Max(maxMass, msfeature.MassMonoisotopic);
                msdataPoints.Add(new ScatterPoint(net, msfeature.MassMonoisotopic, 0.8));
            }

            var netRange = maxNet - minNet;

            netRange = netRange.Equals(0.0) ? 0.01 : netRange;
            var massRange = maxMass - minMass;

            massRange = Math.Max(1.0, massRange);

            minNet  = minNet - (0.25 * netRange);
            maxNet  = maxNet + (0.25 * netRange);
            minMass = Math.Max(minMass - (massRange * 0.5), 0);
            maxMass = maxMass + (massRange * 0.5);

            var annotation = new RectangleAnnotation
            {
                MinimumX        = minNet,
                MaximumX        = maxNet,
                MinimumY        = minMass,
                MaximumY        = maxMass,
                Fill            = OxyColors.Transparent,
                StrokeThickness = 1.0,
            };

            return(new Tuple <RectangleAnnotation, IEnumerable <ScatterPoint> >(annotation, msdataPoints));
        }
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList<UMCLight> FindFeatures(  DatasetInformation               information,
            LcmsFeatureFindingOptions   featureFindingOptions,
            MsFeatureFilteringOptions   msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions             peptideOptions,
            IFeatureFinder              featureFinder)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures  = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);
            msFeatures      = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX  = RawLoaderFactory.CreateFileReader(information.RawPath))
            {
                rawProviderX.AddDataFile(information.RawPath, 0);
                UpdateStatus("Creating LCMS Features.");
                var features    = featureFinder.FindFeatures(msFeatures,
                                                             featureFindingOptions,
                                                             rawProviderX);
                features        = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List<MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }

                        }

                        if (msFeature.MSnSpectra.Count > 0)
                            lightEntry.Add(msFeature);
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequencePath,
                                        features,
                                        peptideOptions.Fdr,
                                        peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return features;
            }
        }
        private double GetNet(DatasetInformation dataset, int scan)
        {
            var minScan = dataset.ScanTimes.Keys.Min();
            var minEt = dataset.ScanTimes[minScan];

            var maxScan = dataset.ScanTimes.Keys.Max();
            var maxEt = dataset.ScanTimes[maxScan];

            var et = dataset.ScanTimes[scan];

            return (et - minEt) / (maxEt - minEt);
        }
Exemple #33
0
        /// <summary>
        ///     Adds a new dataset to the list.
        /// </summary>
        /// <param name="info"></param>
        /// <returns>A list of added datasets</returns>
        public List<DatasetInformation> AddInputFiles(List<InputFile> inputFiles)
        {
            var addedSets = new List<DatasetInformation>();
            var datasetMap = new Dictionary<string, DatasetInformation>();

            foreach (var x in Datasets)
            {
                datasetMap.Add(x.DatasetName, x);
            }

            var inputMap = new Dictionary<string, List<InputFile>>();

            foreach (var file in inputFiles)
            {
                var name = Path.GetFileName(file.Path);
                var datasetName = DatasetInformation.ExtractDatasetName(name);
                var isEntryMade = inputMap.ContainsKey(datasetName);
                if (!isEntryMade)
                {
                    inputMap.Add(datasetName, new List<InputFile>());
                }

                inputMap[datasetName].Add(file);
            }

            var i = 0;
            foreach (var datasetName in inputMap.Keys)
            {
                var files = inputMap[datasetName];
                var datasetInformation = new DatasetInformation();
                datasetInformation.DatasetId = i++;
                datasetInformation.DatasetName = datasetName;

                var doesDatasetExist = datasetMap.ContainsKey(datasetName);

                // Here we map the old dataset if it existed already.
                if (datasetMap.ContainsKey(datasetName))
                {
                    datasetInformation = datasetMap[datasetName];
                }

                foreach (var file in files)
                {
                    switch (file.FileType)
                    {
                        case InputFileType.Features:
                            datasetInformation.Features = file;
                            datasetInformation.Path = file.Path;
                            break;
                        case InputFileType.Scans:
                            datasetInformation.Scans = file;
                            break;
                        case InputFileType.Raw:
                            datasetInformation.Raw = file;
                            break;
                        case InputFileType.Sequence:
                            datasetInformation.Sequence = file;
                            break;
                    }
                }

                /// Add the dataset
                if (!doesDatasetExist)
                {
                    addedSets.Add(datasetInformation);
                    Datasets.Add(datasetInformation);
                }
            }

            // Reformat their Id's
            var id = 0;

            foreach (var x in Datasets)
            {
                x.DatasetId = id++;
            }

            return addedSets;
        }
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
 /// <summary>
 ///     Arguments that hold dataset information when features are loaded.
 /// </summary>
 /// <param name="info">Dataset information object</param>
 public FeaturesLoadedEventArgs(DatasetInformation info,
     IList<UMCLight> features)
 {
     DatasetInformation = info;
     Features = features;
 }
Exemple #36
0
        /// <summary>
        ///     Filters the list of MS Features that may be from MS/MS deisotoped data.
        /// </summary>
        public List <MSFeatureLight> Filter(List <MSFeatureLight> msFeatures, IScanSummaryProvider provider, ref DatasetInformation dataset)
        {
            string rawPath = dataset.RawFile.Path;

            if (rawPath == null || string.IsNullOrWhiteSpace(rawPath))
            {
                return(msFeatures);
            }

            // First find all unique scans
            var scanMap = new Dictionary <int, bool>();

            foreach (var feature in msFeatures)
            {
                if (!scanMap.ContainsKey(feature.Scan))
                {
                    // Assume all scans are parents
                    scanMap.Add(feature.Scan, true);
                }
            }
            // Then parse each to figure out if this is true.
            var fullScans = new Dictionary <int, bool>();
            var scanTimes = dataset.ScanTimes;

            if (provider == null)
            {
                UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath)));
            }
            else
            {
                UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath)));

                foreach (var scan in scanMap.Keys)
                {
                    ScanSummary summary = provider.GetScanSummary(scan);

                    if (summary == null)
                    {
                        continue;
                    }
                    if (summary.MsLevel == 1)
                    {
                        fullScans.Add(scan, true);
                    }
                    if (scanTimes.ContainsKey(scan))
                    {
                        scanTimes[scan] = summary.Time;
                    }
                    else
                    {
                        scanTimes.Add(scan, summary.Time);
                    }
                }
                dataset.ScanTimes = scanTimes;
            }
            return(msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList());
        }
Exemple #37
0
        public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = totalDatasets;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List <UMCLight>();
            var clusters = new List <UMCClusterLight>();
            var x        = new Random();

            var featureId = 0;

            for (var i = 0; i < totalClusters; i++)
            {
                var N      = x.Next(1, total);
                var charge = x.Next(1, 10);
                var hash   = new HashSet <int>();

                var net  = x.NextDouble();
                var mass = 400 + (1600 * x.NextDouble());
                var dt   = 60 * x.NextDouble();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId                 = did;
                    feature.Id                      = featureId++;
                    feature.ChargeState             = charge;
                    feature.MassMonoisotopic        = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.Net                     = net + .03 * x.NextDouble();
                    feature.NetAligned              = feature.Net;
                    feature.Net                     = feature.Net;
                    feature.DriftTime               = dt;
                    feature.AbundanceSum            = x.Next(100, 200);
                    feature.Abundance               = feature.Abundance;
                    feature.ClusterId               = -1;
                    features.Add(feature);
                }
            }
            featureCache.AddAll(features);
        }
Exemple #38
0
        /// <summary>
        /// Make sure the value for options.MaximumScanRange, which is used by the Feature Finder, 
        /// is at least as large as the filterOptions.FeatureLengthRange.Maximum value, 
        /// which is used for filtering the features by length
        /// </summary>
        /// <param name="information"></param>
        /// <param name="options"></param>
        /// <param name="filterOptions"></param>
        private static void ValidateFeatureFinderMaxScanLength(
            DatasetInformation information,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            if (!filterOptions.TreatAsTimeNotScan)
            {
                if (options.MaximumScanRange < filterOptions.FeatureLengthRange.Maximum)
                {
                    // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                    options.MaximumScanRange = (int)filterOptions.FeatureLengthRange.Maximum;
                }
                return;
            }

            int maxScanLength;

            if (information.ScanTimes.Count == 0)
            {
                // FeatureLengthRange.Maximum is in minutes
                // Assume 3 scans/second (ballpark estimate)
                maxScanLength = (int)filterOptions.FeatureLengthRange.Maximum * 60 * 3;
            }
            else
            {
                // Find the average number of scans that spans FeatureLengthRange.Maximum minutes

                // Step through the dictionary to find the average number of scans per minute
                var minuteThreshold = 1;
                var scanCountCurrent = 0;
                var scanCountsPerMinute = new List<int>();

                foreach (var entry in information.ScanTimes)
                {
                    if (entry.Value < minuteThreshold)
                    {
                        scanCountCurrent++;
                    }
                    else
                    {
                        if (scanCountCurrent > 0)
                        {
                            scanCountsPerMinute.Add(scanCountCurrent);
                        }
                        scanCountCurrent = 0;
                        minuteThreshold++;
                    }
                }

                int averageScansPerMinute;
                if (scanCountsPerMinute.Count > 0)
                {
                    averageScansPerMinute = (int)scanCountsPerMinute.Average();
                }
                else
                {
                    averageScansPerMinute = 180;
                }

                maxScanLength = (int)(filterOptions.FeatureLengthRange.Maximum * averageScansPerMinute * 1.25);
            }

            if (options.MaximumScanRange < maxScanLength)
            {
                // Bump up the scan range used by the LCMS Feature Finder to allow for longer featuers
                options.MaximumScanRange = maxScanLength;
            }
        }
Exemple #39
0
        public void TestClusterGeneration(string databasePath,
            string crossPath,
            int charge,
            int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = 10;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();
            var featureId = 0;
            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness = i;

                var N = x.Next(1, total);
                cluster.Id = i;
                cluster.ChargeState = charge;
                var hash = new HashSet<int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight();
                    feature.GroupId = did;
                    feature.Id = featureId++;
                    feature.ChargeState = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net = x.NextDouble();
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                clusters,
                minimumClusterSize,
                charge,
                crossPath,
                databasePath,
                300000);
        }
        private IList<UMCLight> AlignDataset(
            IList<UMCLight> features,
            IEnumerable<UMCLight> baselineFeatures,
            MassTagDatabase database,
            DatasetInformation datasetInfo,
            DatasetInformation baselineInfo)
        {
            classAlignmentData alignmentData;

            if (baselineInfo == null && database == null)
            {
                throw new NullReferenceException("No reference was set for LC-MS alignment.");
            }

            // align the data.
            if (baselineFeatures != null && baselineInfo != null && baselineInfo.IsBaseline)
            {
                // Align pairwise and cache results intermediately.
                var aligner = m_algorithms.DatasetAligner;
                RegisterProgressNotifier(aligner);

                UpdateStatus("Aligning " + datasetInfo.DatasetName + " to baseline.");
                alignmentData = aligner.Align(baselineFeatures, features);

                DeRegisterProgressNotifier(aligner);
            }
            else
            {
                // Align pairwise and cache results intermediately.
                var aligner = m_algorithms.DatabaseAligner;
                RegisterProgressNotifier(aligner);

                UpdateStatus("Aligning " + datasetInfo.DatasetName + " to mass tag database.");
                alignmentData = aligner.Align(database, features);

                DeRegisterProgressNotifier(aligner);
            }

            if (alignmentData != null)
            {
                alignmentData.aligneeDataset = datasetInfo.DatasetName;
                alignmentData.DatasetID = datasetInfo.DatasetId;
            }

            var args = new FeaturesAlignedEventArgs(datasetInfo,
                baselineFeatures,
                features,
                alignmentData);

            if (FeaturesAligned != null)
                FeaturesAligned(this, args);

            UpdateStatus("Updating cache with aligned features.");
            return features;
        }
        private void ExportAlignmentData(classAlignmentData data,
            DatasetInformation baselineDatasetInformation,
            DatasetInformation alignDatasetInformation,
            IEnumerable<UMCLight> baselineFeatures,
            IEnumerable<UMCLight> aligneeFeatures)
        {
            var netValues = new List<double>();
            var massValues = new List<double>();

            var anchorPoints = data.Matches;
            foreach (var match in anchorPoints)
            {
                netValues.Add(match.AnchorPointX.Net - match.AnchorPointY.Net);
                massValues.Add(match.AnchorPointX.Mass - match.AnchorPointY.Mass);
            }

            var netHist =
                MatchCountHistogramBuilder.CreateResidualHistogram(-.05, .05, .01, netValues);

            var netHistogram = new Dictionary<double, int>();

            Console.WriteLine();
            for (var i = 0; i < netHist.Bins.Count; i++)
            {
                netHistogram.Add(netHist.Bins[i], Convert.ToInt32(netHist.Data[i]));
                Console.WriteLine("{0}\t{1}", netHist.Bins[i], netHist.Data[i]);
            }
        }
Exemple #42
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }
        private void ReportPeptideFeatures(DatasetInformation information, IEnumerable<UMCLight> features)
        {
            if (!m_config.ShouldCreatePeptideScanFiles)
                return;

            var path = Path.Combine(m_config.AnalysisPath, information.DatasetName + "_peptide_scans.csv");
            var writer = new PeptideScanWriter();
            writer.Write(path, features);
        }
 public BaselineFeaturesLoadedEventArgs(DatasetInformation info, List<UMCLight> features,
     MassTagDatabase database)
     : base(info, features)
 {
     Database = database;
 }
        /// <summary>
        /// Partition the current view into (numSectionsPerAxis)^2 sections and select the top 
        /// "featuresPerSection" in each section.
        /// </summary>
        /// <param name="dataset">Dataset to get features points for.</param>
        /// <param name="globalMax">The maximum mass in all datasets.</param>
        /// <param name="showMsFeatures">A value indicating whether points with Ms features should be returned.</param>
        /// <returns>
        /// Collection of datapoints for features.
        /// Item 1: LCMS feature datapoints. Item2: MS Feature datapoints.
        /// </returns>
        private Tuple<IEnumerable<DataPoint>, IEnumerable<ScatterPoint>> GetPartitionedPoints(DatasetInformation dataset, double globalMax, bool showMsFeatures = false)
        {
            var netActMaximum = this.netAxis.ActualMaximum.Equals(0) ? 1.0 : this.netAxis.ActualMaximum;
            var massActMaximum = this.massAxis.ActualMaximum.Equals(0) ? globalMax : this.massAxis.ActualMaximum;

            var netStep = (netActMaximum - this.netAxis.ActualMinimum) / this.numSectionsPerAxis;
            var massStep = (massActMaximum - this.massAxis.ActualMinimum) / this.numSectionsPerAxis;

            var featureHash = new HashSet<FeaturePoint>();

            var featureTree = this.quadTrees[dataset];

            for (int i = 0; i < this.numSectionsPerAxis; i++)
            {
                var netMin = this.netAxis.ActualMinimum + (i * netStep);
                var netMax = this.netAxis.ActualMinimum + ((i + 1) * netStep);
                for (int j = 0; j < this.numSectionsPerAxis; j++)
                {
                    var massMin = this.massAxis.ActualMinimum + (j * massStep);
                    var massMax = this.massAxis.ActualMinimum + ((j + 1) * massStep);
                    var treeFeatures = featureTree.Query(new RectangleF
                                          {
                                              X = (float)netMin,
                                              Y = (float)massMin,
                                              Height = (float)(massMax - massMin),
                                              Width = (float)(netMax - netMin)
                                          });
                    var featureRange = treeFeatures.OrderByDescending(feat => feat.UMCLight.Abundance)
                                                   .Take(this.featuresPerSection);

                    featureHash.UnionWith(featureRange);
                }
            }

            return this.GetPoints(featureHash, showMsFeatures);
        }
Exemple #46
0
        public void CreateUMCClusterLight(string databasePath, bool indexDatabase)
        {
            // If the database is not index then do so...but before the session to the db is opened.
            if (indexDatabase)
            {
                DatabaseIndexer.IndexClusters(databasePath);
                DatabaseIndexer.IndexFeatures(databasePath);
            }

            // This is a factory based method that creates a set of data access providers used throughout MultiAlign
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // If you just wanted the clusters you could do this:
            // 1. Connect to the database
            //NHibernateUtil.ConnectToDatabase(databasePath, false);
            // 2. Then extract all of the clusters
            //IUmcClusterDAO clusterCache     = new UmcClusterDAOHibernate();
            //List<UMCClusterLight> clusters  = clusterCache.FindAll();

            var clusters              = providers.ClusterCache.FindAll();
            var shouldGetMsFeatures   = true;
            var shouldGetMsMsFeatures = true;
            var shouldGetRawData      = false;

            // This gets all of the dataset information and maps to a dictionary...if you want the raw data
            // otherwise comment this out.
            var datasets   = providers.DatasetCache.FindAll();
            var datasetMap = new Dictionary <int, DatasetInformation>();

            datasets.ForEach(x => datasetMap.Add(x.DatasetId, x));

            foreach (var cluster in clusters)
            {
                cluster.ReconstructUMCCluster(providers,
                                              true,
                                              false,
                                              shouldGetMsFeatures,
                                              shouldGetMsMsFeatures);

                foreach (var feature in cluster.Features)
                {
                    foreach (var msFeature in feature.Features)
                    {
                        foreach (var spectrumMetaData in msFeature.MSnSpectra)
                        {
                            // then you can do stuff with the ms/ms spectra
                            // If you had the path to the raw file, you could create a reader for you to extract the MS/MS spectra
                            // This supports mzXML and .RAW Thermo files based on the file extension.
                            if (shouldGetRawData)
                            {
                                DatasetInformation info = null;
                                var hasKey = datasetMap.TryGetValue(spectrumMetaData.GroupId, out info);
                                if (hasKey)
                                {
                                    if (info.RawFile != null)
                                    {
                                        // This might seem kind of klunky, but it's called a bridge, this way I can access
                                        // MS/MS spectra from PNNLOmics without having to reference any of the Thermo DLL's
                                        // Nor support file reading capability.  This is also nice because I don't have to load
                                        // several MS/MS spectra when analyzing large datasets for my spectral clustering work.
                                        var rawReader = new InformedProteomicsReader(spectrumMetaData.GroupId, info.RawFile.Path);

                                        // Then grab the actual spectrum...
                                        var summary  = new ScanSummary();
                                        var spectrum = rawReader.GetRawSpectra(spectrumMetaData.Scan, 2, out summary);

                                        // Then do what you want...
                                        // Profit???
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
 public BaselineFeaturesLoadedEventArgs(DatasetInformation info, List<UMCLight> features)
     : base(info, features)
 {
     Database = null;
 }
Exemple #48
0
        /// <summary>
        ///     Handles converting the rows to factor objects.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="args"></param>
        public void HandleDataRow(object sender, MageDataEventArgs args)
        {
            if (args == null)
            {
                throw new NullReferenceException("The factors are invalid.");
            }

            if (args.Fields == null)
            {
                return;

                throw new NullReferenceException("The factor rows are invalid.");
            }

            if (args.Fields.Length < 4)
            {
                return;

                throw new ArgumentException("The number of columns for the factors are invalid.");
            }

            var datasetName = "";

            if (m_columnMapping.ContainsKey("Dataset"))
            {
                datasetName = Convert.ToString(args.Fields[m_columnMapping["Dataset"]]).ToLower().Replace("\"", "");
            }
            else
            {
                return;
            }
            var datasetId = -1;

            if (m_columnMapping.ContainsKey("Dataset_ID"))
            {
                datasetId = Convert.ToInt32(args.Fields[m_columnMapping["Dataset_ID"]].ToString().Replace("\"", ""));
            }
            else
            {
                return;
            }
            var factor = "";

            if (m_columnMapping.ContainsKey("Factor"))
            {
                factor = Convert.ToString(args.Fields[m_columnMapping["Factor"]]).Replace("\"", "");
            }
            else
            {
                return;
            }
            var value = "";

            if (m_columnMapping.ContainsKey("Value"))
            {
                value = Convert.ToString(args.Fields[m_columnMapping["Value"]]).Replace("\"", "");
            }
            else
            {
                return;
            }

            var factorMap = new ExperimentalFactor();

            factorMap.Value = value;
            factorMap.Name  = factor;

            DatasetInformation info = null;

            // Update the dataset ID.
            if (m_datasets.ContainsKey(datasetName))
            {
                info = m_datasets[datasetName];
                m_datasets[datasetName].DMSDatasetID = datasetId;
            }
            else
            {
                return;
            }


            // Make sure we haven't seen this factor map before.
            var shouldAdd = true;

            if (m_factorMaps.ContainsKey(factor))
            {
                if (m_factorMaps[factor].ContainsKey(value))
                {
                    shouldAdd = false;
                }
            }
            else
            {
                m_factorMaps.Add(factor, new Dictionary <string, int>());
            }

            var factorID = 0;

            // Add it to the list and map of factors to dump into the database.
            if (shouldAdd)
            {
                factorMap.FactorID = m_factorCount++;
                m_factorMaps[factor].Add(value, factorMap.FactorID);
                factorID = factorMap.FactorID;
                m_factors.Add(factorMap);
            }
            else
            {
                factorID = m_factorMaps[factor][value];
            }


            var datasetFactorMap = new DatasetToExperimentalFactorMap();

            datasetFactorMap.DatasetID = info.DatasetId;
            datasetFactorMap.FactorID  = factorID;
            m_factorAssignments.Add(datasetFactorMap);
        }
Exemple #49
0
        public void ClusterMsMs(string name,
            string resultPath,
            string sequencePath,
            SequenceFileType type,
            string baseline,
            string features,
            double percent)
        {
            var baselineRaw = baseline.Replace("_isos.csv", ".raw");
            var featuresRaw = features.Replace("_isos.csv", ".raw");

            Console.WriteLine("Create Baseline Information");

            var baselineInfo = new DatasetInformation
            {
                DatasetId = 0,
                Features = new InputFile {Path = baseline},
                Raw = new InputFile {Path = baselineRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            Console.WriteLine("Create Alignee Information");
            var aligneeInfo = new DatasetInformation
            {
                DatasetId = 1,
                Features = new InputFile {Path = features},
                Raw = new InputFile {Path = featuresRaw},
                Sequence = new InputFile {Path = sequencePath}
            };

            var reader = new MsFeatureLightFileReader();

            Console.WriteLine("Reading Baseline Features");
            var baselineMsFeatures = reader.ReadFile(baseline).ToList();
            baselineMsFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);

            Console.WriteLine("Reading Alignee Features");
            var aligneeMsFeatures = reader.ReadFile(features).ToList();
            aligneeMsFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net = .005
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            Console.WriteLine("Detecting Baseline Features");
            var baselineFeatures = finder.FindFeatures(baselineMsFeatures, options, null);

            Console.WriteLine("Detecting Alignee Features");
            var aligneeFeatures = finder.FindFeatures(aligneeMsFeatures, options, null);

            Console.WriteLine("Managing baseline and alignee features");
            baselineFeatures.ForEach(x => x.GroupId = baselineInfo.DatasetId);
            aligneeFeatures.ForEach(x => x.GroupId = aligneeInfo.DatasetId);

            Console.WriteLine("Clustering MS/MS Spectra");
            var clusterer = new MSMSClusterer();
            clusterer.MzTolerance = .5;
            clusterer.MassTolerance = 6;
            clusterer.SpectralComparer = new SpectralNormalizedDotProductComparer
            {
                TopPercent = percent
            };
            clusterer.SimilarityTolerance = .5;
            clusterer.ScanRange = 905;
            clusterer.Progress += clusterer_Progress;

            var allFeatures = new List<UMCLight>();
            allFeatures.AddRange(baselineFeatures);
            allFeatures.AddRange(aligneeFeatures);

            List<MsmsCluster> clusters = null;
            using (var rawReader = new ThermoRawDataFileReader())
            {
                rawReader.AddDataFile(baselineInfo.Raw.Path, baselineInfo.DatasetId);
                rawReader.AddDataFile(aligneeInfo.Raw.Path, aligneeInfo.DatasetId);

                clusters = clusterer.Cluster(allFeatures, rawReader);
                Console.WriteLine("Found {0} Total Clusters", clusters.Count);
            }

            if (clusters != null)
            {
                var now = DateTime.Now;
                var testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}_scans.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    writer.WriteLine();
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        if (cluster.Features.Count == 2)
                        {
                            foreach (var feature in cluster.Features)
                            {
                                scanData += string.Format("{0},", feature.Scan);
                            }
                            scanData += string.Format("{0}", cluster.MeanScore);

                            writer.WriteLine(scanData);
                        }
                    }
                }
                testResultPath = string.Format("{7}\\{0}-results-{1}-{2}-{3}-{4}-{5}-{6}.txt",
                    name,
                    now.Year,
                    now.Month,
                    now.Day,
                    now.Hour,
                    now.Minute,
                    now.Second,
                    resultPath
                    );
                using (TextWriter writer = File.CreateText(testResultPath))
                {
                    writer.WriteLine("[Data]");
                    writer.WriteLine("{0}", baseline);
                    writer.WriteLine("{0}", features);
                    writer.WriteLine("[Scans]");
                    foreach (var cluster in clusters)
                    {
                        var scanData = "";
                        var data = "";
                        foreach (var feature in cluster.Features)
                        {
                            scanData += string.Format("{0},", feature.Scan);
                            data += string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                        }
                        writer.WriteLine(scanData + "," + data);
                    }
                    writer.WriteLine("");
                    writer.WriteLine("");
                    writer.WriteLine("[Clusters]");

                    foreach (var cluster in clusters)
                    {
                        writer.WriteLine("cluster id, cluster score");
                        writer.WriteLine("{0}, {1}", cluster.Id, cluster.MeanScore);
                        writer.WriteLine("feature dataset id, id, monoisotopic mass, mz, charge, scan, peptides");

                        foreach (var feature in cluster.Features)
                        {
                            var data = string.Format("{0},{1},{2},{3},{4},{5}",
                                feature.GroupId,
                                feature.Id,
                                feature.MassMonoisotopic,
                                feature.Mz,
                                feature.ChargeState,
                                feature.Scan);
                            foreach (var spectrum in feature.MSnSpectra)
                            {
                                foreach (var peptide in spectrum.Peptides)
                                {
                                    data += string.Format(",{0},{1}", peptide.Sequence, peptide.Score);
                                }
                            }
                            writer.WriteLine(data);
                        }
                    }
                }
            }
        }
Exemple #50
0
 /// <summary>
 ///     Arguments that hold dataset information when features are loaded.
 /// </summary>
 /// <param name="info">Dataset information object</param>
 public FeaturesLoadedEventArgs(DatasetInformation info,
                                IList <UMCLight> features)
 {
     DatasetInformation = info;
     Features           = features;
 }
Exemple #51
0
        public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = totalDatasets;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();

            var featureId = 0;
            for (var i = 0; i < totalClusters; i++)
            {
                var N = x.Next(1, total);
                var charge = x.Next(1, 10);
                var hash = new HashSet<int>();

                var net = x.NextDouble();
                var mass = 400 + (1600*x.NextDouble());
                var dt = 60*x.NextDouble();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight
                    {
                        GroupId = did,
                        Id = featureId++,
                        ChargeState = charge,
                        MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3)
                    };
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.Net = net + 0.03 * x.NextDouble();
                    feature.NetAligned = feature.Net;
                    feature.Net = feature.Net;
                    feature.DriftTime = dt;
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = -1;
                    features.Add(feature);
                }
            }
            featureCache.AddAll(features);
        }
Exemple #52
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Exemple #53
0
        /// <summary>
        ///     Filters the list of MS Features that may be from MS/MS deisotoped data.
        /// </summary>
        public List<MSFeatureLight> Filter(List<MSFeatureLight> msFeatures, ref DatasetInformation dataset)
        {
            string rawPath = dataset.RawPath;
            if (rawPath == null || string.IsNullOrWhiteSpace(rawPath))
                return msFeatures;

            // First find all unique scans
            var scanMap = new Dictionary<int, bool>();
            foreach (var feature in msFeatures)
            {
                if (!scanMap.ContainsKey(feature.Scan))
                {
                    // Assume all scans are parents
                    scanMap.Add(feature.Scan, true);
                }
            }
            // Then parse each to figure out if this is true.
            var fullScans = new Dictionary<int, bool>();
            var scanTimes = dataset.ScanTimes;
            using (var provider = RawLoaderFactory.CreateFileReader(rawPath))
            {
                if (provider == null)
                {
                    UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath)));
                }
                else
                {
                    UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath)));

                    provider.AddDataFile(rawPath, 0);
                    foreach (var scan in scanMap.Keys)
                    {
                        ScanSummary summary = provider.GetScanSummary(scan, 0);

                        if (summary == null) { continue;}
                        if (summary.MsLevel == 1) { fullScans.Add(scan, true); }
                        if (scanTimes.ContainsKey(scan)){ scanTimes[scan] = summary.Time; }
                        else { scanTimes.Add(scan, summary.Time); }
                    }
                    dataset.ScanTimes = scanTimes;
                }
            }

            return msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList();
        }
Exemple #54
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList<UMCLight> LoadDataset(DatasetInformation dataset,
            MsFeatureFilteringOptions msFilteringOptions,
            LcmsFeatureFindingOptions lcmsFindingOptions,
            LcmsFeatureFilteringOptions lcmsFilteringOptions)
        {
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId = dataset.DatasetId;
            var features = UmcLoaderFactory.LoadUmcFeatureData(dataset.Features.Path, dataset.DatasetId,
                Providers.FeatureCache);

            UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                dataset.DatasetName));
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
            var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            dataset.BuildScanTimes(scansInfo);

            var msnSpectra = new List<MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, ref dataset);

                features = CreateLcmsFeatures(dataset,
                    msFeatures,
                    lcmsFindingOptions,
                    lcmsFilteringOptions);

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan = features.Max(feature => feature.Scan);
                var minScan = features.Min(feature => feature.Scan);
                var id = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned = feature.Net;
                    feature.GroupId = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                        from feature in msFeatures
                        let doesFeatureExists = map.ContainsKey(feature.UmcId)
                        where doesFeatureExists
                        select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            var sequenceProvider = PeptideReaderFactory.CreateReader(dataset.SequencePath);
            if (sequenceProvider != null)
            {
                UpdateStatus("Reading List of Peptides");
                var peptides = sequenceProvider.Read(dataset.SequencePath);
                var count = 0;
                var peptideList = peptides.ToList();
                peptideList.ForEach(x => x.Id = count++);

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }
            return features;
        }
Exemple #55
0
        public void TestClusterGeneration(string databasePath,
                                          string crossPath,
                                          int charge,
                                          int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = 10;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features  = new List <UMCLight>();
            var clusters  = new List <UMCClusterLight>();
            var x         = new Random();
            var featureId = 0;

            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id             = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness      = i;

                var N = x.Next(1, total);
                cluster.Id          = i;
                cluster.ChargeState = charge;
                var hash = new HashSet <int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId          = did;
                    feature.Id               = featureId++;
                    feature.ChargeState      = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net              = x.NextDouble();
                    feature.AbundanceSum     = x.Next(100, 200);
                    feature.Abundance        = feature.Abundance;
                    feature.ClusterId        = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                          clusters,
                          minimumClusterSize,
                          charge,
                          crossPath,
                          databasePath,
                          300000);
        }
Exemple #56
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List<UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List<MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions)
        {
            // Make features
            if (msFeatures.Count < 1)
                throw new Exception("No features were found in the feature files provided.");

            UpdateStatus("Finding features.");
            ISpectraProvider provider = null;
            if (information.RawPath != null && !string.IsNullOrWhiteSpace(information.RawPath))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = RawLoaderFactory.CreateFileReader(information.RawPath);
                provider.AddDataFile(information.RawPath, 0);
            }

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider);

            UpdateStatus("Filtering features.");
            List<UMCLight> filteredFeatures;
            if (filterOptions.TreatAsTimeNotScan) //Feature length determined based on time (mins)
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features,
                    filterOptions, information.ScanTimes);
            }
            else //Feature length determined based on scans
            {
                filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions);
            }

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return filteredFeatures;
        }