Exemple #1
0
 /// <summary>
 ///     Constructor.
 /// </summary>
 /// <param name="featureCache">LCMS Features</param>
 /// <param name="clusterCache">LCMS Feature clusters</param>
 /// <param name="msFeatureCache">MS Features</param>
 /// <param name="msnFeatureCache">MS/MS Features</param>
 /// <param name="msFeatureMap">MS To LCMS Feature map</param>
 /// <param name="msnFeatureMap">MS to MSn Feature map</param>
 public FeatureDataAccessProviders(IUmcDAO featureCache,
                                   IUmcClusterDAO clusterCache,
                                   IMSFeatureDAO msFeatureCache,
                                   IMSnFeatureDAO msnFeatureCache,
                                   IMsnFeatureToMSFeatureDAO msnFeatureMap,
                                   IDatasetDAO datasetCache,
                                   IMassTagMatchDAO massTagMatches,
                                   IMassTagDAO massTags,
                                   IFactorDao factorCache,
                                   IDatasetToFactorMapDAO factorAssignmentCache,
                                   IMSMSClusterMapDAO msmsClusterCache,
                                   IDatabaseSearchSequenceDAO sequenceCache,
                                   ISequenceToMsnFeatureDAO sequenceMapCache) :
     this()
 {
     ClusterCache               = clusterCache;
     FeatureCache               = featureCache;
     MSFeatureCache             = msFeatureCache;
     MSnFeatureCache            = msnFeatureCache;
     MSFeatureToMSnFeatureCache = msnFeatureMap;
     DatasetCache               = datasetCache;
     MassTagMatches             = massTagMatches;
     MassTags = massTags;
     FactorAssignmentCache = factorAssignmentCache;
     FactorCache           = factorCache;
     MSMSClusterCache      = msmsClusterCache;
     DatabaseSequenceCache = sequenceCache;
     SequenceMsnMapCache   = sequenceMapCache;
 }
 /// <summary>
 ///     Constructor.
 /// </summary>
 /// <param name="featureCache">LCMS Features</param>
 /// <param name="clusterCache">LCMS Feature clusters</param>
 /// <param name="msFeatureCache">MS Features</param>
 /// <param name="msnFeatureCache">MS/MS Features</param>
 /// <param name="msFeatureMap">MS To LCMS Feature map</param>
 /// <param name="msnFeatureMap">MS to MSn Feature map</param>
 public FeatureDataAccessProviders(IUmcDAO featureCache,
     IUmcClusterDAO clusterCache,
     IMSFeatureDAO msFeatureCache,
     IMSnFeatureDAO msnFeatureCache,
     IMsnFeatureToMSFeatureDAO msnFeatureMap,
     IDatasetDAO datasetCache,
     IMassTagMatchDAO massTagMatches,
     IMassTagDAO massTags,
     IFactorDao factorCache,
     IDatasetToFactorMapDAO factorAssignmentCache,
     IMSMSClusterMapDAO msmsClusterCache,
     IDatabaseSearchSequenceDAO sequenceCache,
     ISequenceToMsnFeatureDAO sequenceMapCache)
     : this()
 {
     ClusterCache = clusterCache;
     FeatureCache = featureCache;
     MSFeatureCache = msFeatureCache;
     MSnFeatureCache = msnFeatureCache;
     MSFeatureToMSnFeatureCache = msnFeatureMap;
     DatasetCache = datasetCache;
     MassTagMatches = massTagMatches;
     MassTags = massTags;
     FactorAssignmentCache = factorAssignmentCache;
     FactorCache = factorCache;
     MSMSClusterCache = msmsClusterCache;
     DatabaseSequenceCache = sequenceCache;
     SequenceMsnMapCache = sequenceMapCache;
 }
Exemple #3
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        static int Main(string [] args)
        {
            var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle;

            SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS);

            try
            {
                if (args.Length < 2)
                {
                    Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]");
                    Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path");
                    return(1);
                }

                // Setup the analysis processing
                var databasePath = args[0];
                var databaseName = Path.GetFileNameWithoutExtension(databasePath);
                var path         = Path.GetDirectoryName(databasePath);
                var crossPath    = args[2];
                var chargeState  = Convert.ToInt32(args[1]);

                List <string> datasetList = null;
                if (args.Length == 4)
                {
                    datasetList = File.ReadAllLines(args[3]).ToList();
                }


                if (path == null)
                {
                    Console.WriteLine(@"The directory path is invalid");
                    return(1);
                }


                NHibernateUtil.ConnectToDatabase(databasePath, false);

                IDatasetDAO datasetCache = new DatasetDAOHibernate();
                var         dateSuffix   = AnalysisPathUtils.BuildDateSuffix();
                Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState));

                Logger.PrintMessage("Find all datasets", true);
                var datasets = datasetCache.FindAll();
                Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true);

                // Create the clustering algorithm - average linkage
                IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

                // Create the DAO object to extract the features
                var database = new UmcAdoDAO {
                    DatabasePath = databasePath
                };
                IUmcDAO featureDao = database;


                Logger.PrintMessage(string.Format("Extracting Features"), true);
                var tempFeatures = featureDao.FindByCharge(chargeState);
                Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true);


                var features = new List <UMCLight>();
                if (datasetList != null)
                {
                    var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower());

                    var focusedDatasetList = new Dictionary <int, DatasetInformation>();
                    foreach (var name in datasetList)
                    {
                        var key = name.ToLower();
                        if (featuremap.ContainsKey(key))
                        {
                            Logger.PrintMessage("Using dataset: " + name);
                            focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]);
                        }
                        else
                        {
                            throw new Exception("Didn't find the dataset required..." + name);
                        }
                    }

                    features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature);

                    Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true);
                }
                else
                {
                    features = tempFeatures;
                }

                // Handle logging progress.
                clusterer.Progress += clusterer_Progress;
                clusterer.Parameters.Tolerances.DriftTime        = .3;
                clusterer.Parameters.Tolerances.Mass             = 16;
                clusterer.Parameters.Tolerances.Net              = .014;
                clusterer.Parameters.OnlyClusterSameChargeStates = true;
                clusterer.Parameters.CentroidRepresentation      = ClusterCentroidRepresentation.Mean;
                clusterer.Parameters.DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean);

                // Then cluster
                var clusterWriter = new UmcClusterWriter();
                IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter();
                try
                {
                    clusterWriter.Open(crossPath);
                    clusterWriter.WriteHeader(datasets);

                    clusterer.ClusterAndProcess(features, writer);
                    Logger.PrintMessage("", true);
                    Logger.PrintMessage("ANALYSIS SUCCESS", true);
                    return(0);
                }
                catch (Exception ex)
                {
                    Logger.PrintMessage("Unhandled Error: " + ex.Message);
                    var innerEx = ex.InnerException;
                    while (innerEx != null)
                    {
                        Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                        innerEx = innerEx.InnerException;
                    }
                    Logger.PrintMessage("Stack: " + ex.StackTrace);
                    Logger.PrintMessage("");
                    Logger.PrintMessage("ANALYSIS FAILED");
                    return(1);
                }
                finally
                {
                    clusterWriter.Close();
                }
            }
            catch (Exception ex)
            {
                Logger.PrintMessage("Unhandled Error: " + ex.Message, true);
                var innerEx = ex.InnerException;
                while (innerEx != null)
                {
                    Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                    innerEx = innerEx.InnerException;
                }
                Logger.PrintMessage("Stack: " + ex.StackTrace, true);
                Logger.PrintMessage("");
                Logger.PrintMessage("ANALYSIS FAILED");
                return(1);
            }
        }
Exemple #4
0
        /// <summary>
        ///     Loads feature data from the files provided.
        /// </summary>
        /// <returns></returns>
        public static IList<UMCLight> LoadUmcFeatureData(string path,
            int datasetId,
            IUmcDAO featureCache)
        {
            var features = new List<UMCLight>();
            var extension = Path.GetExtension(path);
            if (extension == null) return features;

            extension = extension.ToUpper();
            switch (extension)
            {
                case ".TXT":
                    var umcReader = new LCMSFeatureFileReader(path);
                    features = umcReader.GetUmcList();
                    break;
                case ".DB3":
                    features = featureCache.FindByDatasetId(datasetId);
                    break;
                default: //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3"
                    features = featureCache.FindByDatasetId(datasetId);
                    break;
            }
            return features;
        }
Exemple #5
0
        /// <summary>
        ///     Loads feature data from the files provided.
        /// </summary>
        /// <returns></returns>
        public static IList <UMCLight> LoadUmcFeatureData(DatasetInformation dataset, IUmcDAO featureCache, IScanSummaryProvider provider = null)
        {
            var features  = new List <UMCLight>();
            var extension = Path.GetExtension(dataset.Features.Path);

            if (extension == null)
            {
                return(features);
            }

            extension = extension.ToUpper();
            switch (extension)
            {
            case ".TXT":
                if (dataset.Features.Path.EndsWith("_LCMSFeatures.txt"))
                {
                    var reader = new LcImsFeatureFileReader(provider, dataset.DatasetId);
                    features = reader.ReadFile(dataset.Features.Path).ToList();
                }
                else
                {
                    var umcReader = new LCMSFeatureFileReader(dataset.Features.Path);
                    features = umcReader.GetUmcList();
                }
                break;

            case ".DB3":
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;

            case ".MS1FT":
                if (provider != null && provider is InformedProteomicsReader)
                {
                    var promexReader = new PromexFileReader(provider as InformedProteomicsReader, dataset.DatasetId);
                    features = promexReader.ReadFile(dataset.Features.Path).ToList();
                }
                break;

            default:     //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3"
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;
            }

            if (features != null && provider is ISpectraProvider)
            {
                var spectraProvider = provider as ISpectraProvider;
                LoadMsMs(features, spectraProvider);
            }
            return(features);
        }
        public void ClusterFeatures()
        {
            
            this.algorithms = this.builder.GetAlgorithmProvider(this.options);
            var clusterer = this.algorithms.Clusterer;
            clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(this.options.LcmsClusteringOptions);
            this.featureCache = this.analysis.DataProviders.FeatureCache;

            // This just tells us whether we are using mammoth memory partitions or not.          
            var clusterCount = 0;
            var providers = this.analysis.DataProviders;

            // Here we see if we need to separate the charge...
            // IMS is said to require charge separation 
            if (!this.analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge)
            {
                var features = this.featureCache.FindAll();
                var clusters = new List<UMCClusterLight>();
                clusters = clusterer.Cluster(features, clusters);
                foreach (var cluster in clusters)
                {
                    cluster.Id = clusterCount++;
                    cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id);

                    // Updates the cluster with statistics
                    foreach (var feature in cluster.UmcList)
                    {
                        cluster.MsMsCount += feature.MsMsCount;
                        cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount;
                    }
                }
                providers.ClusterCache.AddAll(clusters);
                providers.FeatureCache.UpdateAll(features);
                this.analysis.Clusters = clusters;
            }
            else
            {
                var maxChargeState = this.featureCache.FindMaxCharge();

                /*
                 * Here we cluster all charge states separately.  Probably IMS Data.
                 */
                for (var chargeState = 1; chargeState <= maxChargeState; chargeState++)
                {
                    var features = this.featureCache.FindByCharge(chargeState);
                    if (features.Count < 1)
                    {
                        break;
                    }

                    var clusters = clusterer.Cluster(features);
                    foreach (var cluster in clusters)
                    {
                        cluster.Id = clusterCount++;
                        cluster.UmcList.ForEach(x => x.ClusterId = cluster.Id);

                        // Updates the cluster with statistics
                        foreach (var feature in cluster.Features)
                        {
                            cluster.MsMsCount += feature.MsMsCount;
                            cluster.IdentifiedSpectraCount += feature.IdentifiedSpectraCount;
                        }
                    }

                    this.analysis.DataProviders.ClusterCache.AddAll(clusters);
                    this.analysis.DataProviders.FeatureCache.UpdateAll(features);
                }
                this.analysis.Clusters = this.analysis.DataProviders.ClusterCache.FindAll();
            }

            MessageBox.Show("Working Command");
        }
Exemple #7
0
        internal void ClusterFeatures(IProgress <ProgressData> workflowProgress = null)
        {
            var taskBarProgress = TaskBarProgress.GetInstance();

            taskBarProgress.ShowProgress(this, true);
            workflowProgress = workflowProgress ?? new Progress <ProgressData>();
            IProgress <ProgressData> internalProgress = new Progress <ProgressData>(pd =>
            {
                this.progress.Report((int)pd.Percent);
                this.ProgressPercent = pd.Percent;
                taskBarProgress.SetProgress(this, pd.Percent);
                workflowProgress.Report(pd);
            });

            this.algorithms = this.builder.GetAlgorithmProvider(this.options);
            var clusterer = this.algorithms.Clusterer;

            clusterer.Parameters = LcmsClusteringOptions.ConvertToOmics(this.options.LcmsClusteringOptions);
            this.featureCache    = this.analysis.DataProviders.FeatureCache;
            if (clusterer is PromexClusterer)
            {
                var promexClusterer = clusterer as PromexClusterer;
                promexClusterer.Readers = this.analysis.DataProviders.ScanSummaryProviderCache;
            }

            foreach (var dataset in this.Datasets)
            {
                if (dataset.FeaturesFound)
                {
                    dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustering;
                }
            }

            ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged);
            ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged);

            this.ShouldShowProgress = true;
            var progData        = new ProgressData(internalProgress);
            var clusterProgress = new Progress <ProgressData>(pd => progData.Report(pd.Percent));

            this.analysis.DataProviders.DatabaseLock.EnterWriteLock();
            DatabaseIndexer.IndexClustersDrop(NHibernateUtil.Path);
            this.analysis.DataProviders.ClusterCache.ClearAllClusters();
            this.analysis.DataProviders.DatabaseLock.ExitWriteLock();

            // The id for a cluster - keep track here to avoid duplicates when separating by charge.
            var clusterCount = 0;

            // Here we see if we need to separate the charge...
            // IMS is said to require charge separation
            if (!this.analysis.Options.LcmsClusteringOptions.ShouldSeparateCharge)
            {
                progData.StepRange(45);
                var features = new List <UMCLight>();
                var i        = 0;
                var datasets = this.Datasets.Where(ds => ds.FeaturesFound).ToList();
                foreach (var dataset in datasets)
                {
                    this.analysis.DataProviders.DatabaseLock.EnterReadLock();
                    features.AddRange(this.featureCache.FindByDatasetId(dataset.DatasetId));
                    this.analysis.DataProviders.DatabaseLock.ExitReadLock();
                    progData.Report(++i, datasets.Count);
                }

                progData.StepRange(100);
                ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress);
            }
            else
            {
                var maxChargeState = this.featureCache.FindMaxCharge();

                // Here we cluster all charge states separately.  Probably IMS Data.
                for (var chargeState = 1; chargeState <= maxChargeState; chargeState++)
                {
                    var maxPercent = ((100.0 * chargeState) / maxChargeState);
                    // TODO: Add restriction by selected dataset ids?
                    var features = this.featureCache.FindByCharge(chargeState);
                    if (features.Count < 1)
                    {
                        continue;
                    }

                    progData.StepRange(maxPercent);
                    ClusterGroupOfFeatures(clusterer, features, ref clusterCount, clusterProgress);
                }

                this.analysis.Clusters = this.analysis.DataProviders.ClusterCache.FindAll();
            }

            this.analysis.DataProviders.DatabaseLock.EnterWriteLock();
            DatabaseIndexer.IndexClusters(NHibernateUtil.Path);
            this.analysis.DataProviders.DatabaseLock.ExitWriteLock();

            foreach (var dataset in this.Datasets)
            {
                if (dataset.DatasetState == DatasetInformationViewModel.DatasetStates.PersistingClusters)
                {
                    dataset.DatasetState = DatasetInformationViewModel.DatasetStates.Clustered;
                }
            }

            try
            {
                // Write to file
                this.WriteClusterData(string.Format("{0}_crosstab.tsv", this.analysis.AnalysisName), this.analysis.Clusters);
            }
            catch (Exception ex)
            {
                var errMsg = "Error writing results to text file: " + ex.Message;
                Logger.PrintMessage(errMsg);

                // Todo: Add this: if (!GlobalSettings.AutomatedAnalysisMode)
                MessageBox.Show(errMsg);
            }

            ThreadSafeDispatcher.Invoke(this.ClusterFeaturesCommand.RaiseCanExecuteChanged);
            ThreadSafeDispatcher.Invoke(this.DisplayClustersCommand.RaiseCanExecuteChanged);

            taskBarProgress.ShowProgress(this, false);
            this.ShouldShowProgress = false;
        }