Esempio n. 1
0
        public void TestClusterWriting(string databasePath, string crossPath, int charge, int minimumClusterSize)
        {
            NHibernateUtil.ConnectToDatabase(databasePath, false);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            Console.WriteLine("Find all datasets");
            var datasets = datasetCache.FindAll();

            Console.WriteLine("Find all clusters");
            var clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 300000);
        }
Esempio n. 2
0
        public void CreateCrossTab(string databasePath, string crossPath, int charge, int minimumClusterSize)
        {
            NHibernateUtil.ConnectToDatabase(databasePath, false);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            Console.WriteLine("Find all datasets");
            var datasets = datasetCache.FindAll();

            Console.WriteLine("Find all clusters");
            var clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets, clusters, minimumClusterSize, charge, crossPath, databasePath, 50000);
        }
Esempio n. 3
0
        public void CreateDatasetMap(string databasePath, string crossPath, int charge, int minimumClusterSize)
        {
            NHibernateUtil.ConnectToDatabase(databasePath, false);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            Console.WriteLine("Find all datasets");
            var datasets = datasetCache.FindAll();
            using (TextWriter writer = File.CreateText(crossPath + ".csv"))
            {
                writer.WriteLine("Dataset, Dataset Id");
                foreach (var info in datasets)
                {
                    writer.WriteLine("{0},{1}", info.DatasetName, info.DatasetId);
                }
            }
        }
Esempio n. 4
0
        public void CreateDatasetMap(string databasePath, string crossPath, int charge, int minimumClusterSize)
        {
            NHibernateUtil.ConnectToDatabase(databasePath, false);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            Console.WriteLine("Find all datasets");
            var datasets = datasetCache.FindAll();

            using (TextWriter writer = File.CreateText(crossPath + ".csv"))
            {
                writer.WriteLine("Dataset, Dataset Id");
                foreach (var info in datasets)
                {
                    writer.WriteLine("{0},{1}", info.DatasetName, info.DatasetId);
                }
            }
        }
Esempio n. 5
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        static int Main(string [] args)
        {
            var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle;

            SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS);

            try
            {
                if (args.Length < 2)
                {
                    Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]");
                    Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path");
                    return(1);
                }

                // Setup the analysis processing
                var databasePath = args[0];
                var databaseName = Path.GetFileNameWithoutExtension(databasePath);
                var path         = Path.GetDirectoryName(databasePath);
                var crossPath    = args[2];
                var chargeState  = Convert.ToInt32(args[1]);

                List <string> datasetList = null;
                if (args.Length == 4)
                {
                    datasetList = File.ReadAllLines(args[3]).ToList();
                }


                if (path == null)
                {
                    Console.WriteLine(@"The directory path is invalid");
                    return(1);
                }


                NHibernateUtil.ConnectToDatabase(databasePath, false);

                IDatasetDAO datasetCache = new DatasetDAOHibernate();
                var         dateSuffix   = AnalysisPathUtils.BuildDateSuffix();
                Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState));

                Logger.PrintMessage("Find all datasets", true);
                var datasets = datasetCache.FindAll();
                Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true);

                // Create the clustering algorithm - average linkage
                IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

                // Create the DAO object to extract the features
                var database = new UmcAdoDAO {
                    DatabasePath = databasePath
                };
                IUmcDAO featureDao = database;


                Logger.PrintMessage(string.Format("Extracting Features"), true);
                var tempFeatures = featureDao.FindByCharge(chargeState);
                Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true);


                var features = new List <UMCLight>();
                if (datasetList != null)
                {
                    var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower());

                    var focusedDatasetList = new Dictionary <int, DatasetInformation>();
                    foreach (var name in datasetList)
                    {
                        var key = name.ToLower();
                        if (featuremap.ContainsKey(key))
                        {
                            Logger.PrintMessage("Using dataset: " + name);
                            focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]);
                        }
                        else
                        {
                            throw new Exception("Didn't find the dataset required..." + name);
                        }
                    }

                    features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature);

                    Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true);
                }
                else
                {
                    features = tempFeatures;
                }

                // Handle logging progress.
                clusterer.Progress += clusterer_Progress;
                clusterer.Parameters.Tolerances.DriftTime        = .3;
                clusterer.Parameters.Tolerances.Mass             = 16;
                clusterer.Parameters.Tolerances.Net              = .014;
                clusterer.Parameters.OnlyClusterSameChargeStates = true;
                clusterer.Parameters.CentroidRepresentation      = ClusterCentroidRepresentation.Mean;
                clusterer.Parameters.DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean);

                // Then cluster
                var clusterWriter = new UmcClusterWriter();
                IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter();
                try
                {
                    clusterWriter.Open(crossPath);
                    clusterWriter.WriteHeader(datasets);

                    clusterer.ClusterAndProcess(features, writer);
                    Logger.PrintMessage("", true);
                    Logger.PrintMessage("ANALYSIS SUCCESS", true);
                    return(0);
                }
                catch (Exception ex)
                {
                    Logger.PrintMessage("Unhandled Error: " + ex.Message);
                    var innerEx = ex.InnerException;
                    while (innerEx != null)
                    {
                        Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                        innerEx = innerEx.InnerException;
                    }
                    Logger.PrintMessage("Stack: " + ex.StackTrace);
                    Logger.PrintMessage("");
                    Logger.PrintMessage("ANALYSIS FAILED");
                    return(1);
                }
                finally
                {
                    clusterWriter.Close();
                }
            }
            catch (Exception ex)
            {
                Logger.PrintMessage("Unhandled Error: " + ex.Message, true);
                var innerEx = ex.InnerException;
                while (innerEx != null)
                {
                    Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                    innerEx = innerEx.InnerException;
                }
                Logger.PrintMessage("Stack: " + ex.StackTrace, true);
                Logger.PrintMessage("");
                Logger.PrintMessage("ANALYSIS FAILED");
                return(1);
            }
        }
Esempio n. 6
0
        public void TestClusterGeneration(string databasePath,
            string crossPath,
            int charge,
            int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = 10;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();
            var featureId = 0;
            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness = i;

                var N = x.Next(1, total);
                cluster.Id = i;
                cluster.ChargeState = charge;
                var hash = new HashSet<int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight();
                    feature.GroupId = did;
                    feature.Id = featureId++;
                    feature.ChargeState = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net = x.NextDouble();
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                clusters,
                minimumClusterSize,
                charge,
                crossPath,
                databasePath,
                300000);
        }
Esempio n. 7
0
        public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List<DatasetInformation>();
            var total = totalDatasets;
            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List<UMCLight>();
            var clusters = new List<UMCClusterLight>();
            var x = new Random();

            var featureId = 0;
            for (var i = 0; i < totalClusters; i++)
            {
                var N = x.Next(1, total);
                var charge = x.Next(1, 10);
                var hash = new HashSet<int>();

                var net = x.NextDouble();
                var mass = 400 + (1600*x.NextDouble());
                var dt = 60*x.NextDouble();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);

                    var feature = new UMCLight
                    {
                        GroupId = did,
                        Id = featureId++,
                        ChargeState = charge,
                        MassMonoisotopic = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3)
                    };
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.Net = net + 0.03 * x.NextDouble();
                    feature.NetAligned = feature.Net;
                    feature.Net = feature.Net;
                    feature.DriftTime = dt;
                    feature.AbundanceSum = x.Next(100, 200);
                    feature.Abundance = feature.Abundance;
                    feature.ClusterId = -1;
                    features.Add(feature);
                }
            }
            featureCache.AddAll(features);
        }
Esempio n. 8
0
        public void TestCreateDummyDatabase(string databasePath, int totalDatasets, int totalClusters)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = totalDatasets;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features = new List <UMCLight>();
            var clusters = new List <UMCClusterLight>();
            var x        = new Random();

            var featureId = 0;

            for (var i = 0; i < totalClusters; i++)
            {
                var N      = x.Next(1, total);
                var charge = x.Next(1, 10);
                var hash   = new HashSet <int>();

                var net  = x.NextDouble();
                var mass = 400 + (1600 * x.NextDouble());
                var dt   = 60 * x.NextDouble();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId                 = did;
                    feature.Id                      = featureId++;
                    feature.ChargeState             = charge;
                    feature.MassMonoisotopic        = FeatureLight.ComputeDaDifferenceFromPPM(mass, 3);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.Net                     = net + .03 * x.NextDouble();
                    feature.NetAligned              = feature.Net;
                    feature.Net                     = feature.Net;
                    feature.DriftTime               = dt;
                    feature.AbundanceSum            = x.Next(100, 200);
                    feature.Abundance               = feature.Abundance;
                    feature.ClusterId               = -1;
                    features.Add(feature);
                }
            }
            featureCache.AddAll(features);
        }
Esempio n. 9
0
        public void TestClusterGeneration(string databasePath,
                                          string crossPath,
                                          int charge,
                                          int minimumClusterSize)
        {
            File.Delete(databasePath);
            NHibernateUtil.ConnectToDatabase(databasePath, true);

            IDatasetDAO    datasetCache = new DatasetDAOHibernate();
            IUmcClusterDAO clusterCache = new UmcClusterDAOHibernate();
            IUmcDAO        featureCache = new UmcDAOHibernate();

            // Creating a dataset
            Console.WriteLine("Creating dummy datasets");
            var datasets = new List <DatasetInformation>();
            var total    = 10;

            for (var i = 0; i < total; i++)
            {
                var dataset = new DatasetInformation();
                dataset.DatasetId   = i;
                dataset.DatasetName = "test" + i;
                datasets.Add(dataset);
            }
            datasetCache.AddAll(datasets);
            datasets.Clear();
            datasets = datasetCache.FindAll();

            // Create features
            Console.WriteLine("Creating features");
            var features  = new List <UMCLight>();
            var clusters  = new List <UMCClusterLight>();
            var x         = new Random();
            var featureId = 0;

            for (var i = 0; i < 100; i++)
            {
                var cluster = new UMCClusterLight();
                cluster.Id             = i;
                cluster.AmbiguityScore = i;
                cluster.Tightness      = i;

                var N = x.Next(1, total);
                cluster.Id          = i;
                cluster.ChargeState = charge;
                var hash = new HashSet <int>();

                for (var j = 0; j < N; j++)
                {
                    var did = -1;
                    do
                    {
                        did = x.Next(0, total);
                        if (!hash.Contains(did))
                        {
                            hash.Add(did);
                            break;
                        }
                    } while (true);


                    var feature = new UMCLight();
                    feature.GroupId          = did;
                    feature.Id               = featureId++;
                    feature.ChargeState      = charge;
                    feature.MassMonoisotopic = x.NextDouble();
                    feature.Net              = x.NextDouble();
                    feature.AbundanceSum     = x.Next(100, 200);
                    feature.Abundance        = feature.Abundance;
                    feature.ClusterId        = cluster.Id;

                    cluster.AddChildFeature(feature);
                    features.Add(feature);
                }
                cluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                clusters.Add(cluster);
            }
            featureCache.AddAll(features);
            clusterCache.AddAll(clusters);
            clusters = clusterCache.FindAll();

            Console.WriteLine("Find all clusters");
            clusters = clusterCache.FindByCharge(charge);

            WriteClusters(datasets,
                          clusters,
                          minimumClusterSize,
                          charge,
                          crossPath,
                          databasePath,
                          300000);
        }
Esempio n. 10
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>        
        static int Main(string [] args)
        {
            var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle;
            SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS);

            try
            {
                if (args.Length < 2)
                {
                    Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]");
                    Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path");
                    return 1;
                }

                // Setup the analysis processing
                var databasePath = args[0];
                var databaseName = Path.GetFileNameWithoutExtension(databasePath);
                var path         = Path.GetDirectoryName(databasePath);
                var crossPath = args[2];
                var chargeState     = Convert.ToInt32(args[1]);

                List<string> datasetList = null;
                if (args.Length == 4)
                {
                    datasetList = File.ReadAllLines(args[3]).ToList();
                }

                if (path == null)
                {
                    Console.WriteLine(@"The directory path is invalid");
                    return 1;
                }

                NHibernateUtil.ConnectToDatabase(databasePath, false);

                IDatasetDAO datasetCache = new DatasetDAOHibernate();
                var dateSuffix        = AnalysisPathUtils.BuildDateSuffix();
                Logger.LogPath           = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState));

                Logger.PrintMessage("Find all datasets", true);
                var datasets = datasetCache.FindAll();
                Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true);

                // Create the clustering algorithm - average linkage
                IClusterer<UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>();

                // Create the DAO object to extract the features
                var database      = new UmcAdoDAO {DatabasePath = databasePath};
                IUmcDAO featureDao = database;

                Logger.PrintMessage(string.Format("Extracting Features"), true);
                var tempFeatures = featureDao.FindByCharge(chargeState);
                Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true);

                var features = new List<UMCLight>();
                if (datasetList != null)
                {
                    var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower());

                    var focusedDatasetList = new Dictionary<int, DatasetInformation>();
                    foreach (var name in datasetList)
                    {
                        var key = name.ToLower();
                        if (featuremap.ContainsKey(key))
                        {
                            Logger.PrintMessage("Using dataset: " + name);
                            focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]);
                        }
                        else
                            throw new Exception("Didn't find the dataset required..." + name);
                    }

                    features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature);

                    Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true);
                }
                else
                {
                    features = tempFeatures;
                }

                // Handle logging progress.
                clusterer.Progress      += clusterer_Progress;
                clusterer.Parameters.Tolerances.DriftTime           = .3;
                clusterer.Parameters.Tolerances.Mass                = 16;
                clusterer.Parameters.Tolerances.Net       = .014;
                clusterer.Parameters.OnlyClusterSameChargeStates    = true;
                clusterer.Parameters.CentroidRepresentation         = ClusterCentroidRepresentation.Mean;
                clusterer.Parameters.DistanceFunction               = PNNLOmics.Algorithms.Distance.DistanceFactory<UMCLight>.CreateDistanceFunction(PNNLOmics.Algorithms.Distance.DistanceMetric.WeightedEuclidean);

                // Then cluster
                var clusterWriter = new UmcClusterWriter();
                IClusterWriter<UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter();
                try
                {
                    clusterWriter.Open(crossPath);
                    clusterWriter.WriteHeader(datasets);

                    clusterer.ClusterAndProcess(features, writer);
                    Logger.PrintMessage("", true);
                    Logger.PrintMessage("ANALYSIS SUCCESS", true);
                    return 0;
                }
                catch (Exception ex)
                {
                    Logger.PrintMessage("Unhandled Error: " + ex.Message);
                    var innerEx = ex.InnerException;
                    while (innerEx != null)
                    {
                        Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                        innerEx = innerEx.InnerException;
                    }
                    Logger.PrintMessage("Stack: " + ex.StackTrace);
                    Logger.PrintMessage("");
                    Logger.PrintMessage("ANALYSIS FAILED");
                    return 1;
                }
                finally
                {
                    clusterWriter.Close();
                }
            }
            catch (Exception ex)
            {
                Logger.PrintMessage("Unhandled Error: " + ex.Message, true);
                var innerEx = ex.InnerException;
                while (innerEx != null)
                {
                    Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                    innerEx = innerEx.InnerException;
                }
                Logger.PrintMessage("Stack: " + ex.StackTrace, true);
                Logger.PrintMessage("");
                Logger.PrintMessage("ANALYSIS FAILED");
                return 1;
            }
        }