Example #1
0
        public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod)
        {
            var features  = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = useBoxMethod,
                Parameters =
                {
                    CentroidRepresentation      = ClusterCentroidRepresentation.Mean,
                    DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances                  =
                    {
                        Mass      =                                                       10,
                        DriftTime =                                                       .3,
                        Net       = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);
            var i        = 0;

            clusters.ForEach(x => x.Id = i++);
            WriteClusters(clusters);
        }
Example #2
0
        public IClusterer <T, U> Create(GenericClusteringAlgorithmType clusterType)
        {
            IClusterer <T, U> clusterer = null;

            switch (clusterType)
            {
            case GenericClusteringAlgorithmType.AverageLinkage:
                clusterer = new UMCAverageLinkageClusterer <T, U>();
                break;

            case GenericClusteringAlgorithmType.Centroid:
                clusterer = new UMCCentroidClusterer <T, U>();
                break;

            case GenericClusteringAlgorithmType.SingleLinkage:
                clusterer = new UMCSingleLinkageClusterer <T, U>();
                break;

            case GenericClusteringAlgorithmType.Prims:
                clusterer = new UMCPrimsClustering <T, U>();
                break;

            case GenericClusteringAlgorithmType.BinarySearchTree:
                clusterer = new Clustering.MsFeatureTreeClusterer <T, U>();
                break;

            default:
                throw new ArgumentOutOfRangeException(string.Format("Cannot create generic {0} clusterer.", clusterType));
            }

            return(clusterer);
        }
Example #3
0
        public void TestDistanceDistributions(string path, DistanceMetric dist)
        {
            var features  = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = false,
                Parameters =
                {
                    CentroidRepresentation      = ClusterCentroidRepresentation.Mean,
                    DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances                  =
                    {
                        Mass      =                                                       10,
                        DriftTime =                                                       .3,
                        Net       = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);

            var distances = new List <double>();

            foreach (var cluster in clusters)
            {
                var centroid = new UMCLight();
                centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic;
                centroid.Net       = cluster.Net;
                centroid.DriftTime = cluster.DriftTime;

                var func = clusterer.Parameters.DistanceFunction;
                foreach (var feature in cluster.Features)
                {
                    var distance = func(feature, centroid);
                    distances.Add(distance);
                }
                distances.Sort();
                var sum = 0;
                foreach (var distance in distances)
                {
                    sum++;
                    Console.WriteLine("{0},{1}", distance, sum);
                }
            }
        }
Example #4
0
        public static IClusterer<UMCLight, UMCClusterLight> Create(LcmsFeatureClusteringAlgorithmType clusterType)
        {
            IClusterer<UMCLight, UMCClusterLight> clusterer = null;
            switch (clusterType)
            {
                case LcmsFeatureClusteringAlgorithmType.AverageLinkage:
                    clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>();
                    break;
                case LcmsFeatureClusteringAlgorithmType.Centroid:
                    clusterer = new UMCCentroidClusterer<UMCLight, UMCClusterLight>();
                    break;
                case LcmsFeatureClusteringAlgorithmType.SingleLinkage:
                    clusterer = new UMCSingleLinkageClusterer<UMCLight, UMCClusterLight>();
                    break;
                case LcmsFeatureClusteringAlgorithmType.Prims:
                    clusterer = new UMCPrimsClustering<UMCLight, UMCClusterLight>();
                    break;
            }

            return clusterer;
        }
Example #5
0
        //[TestCase(@"ClusterData\clusterData-merged-nodelin.txt")]
        public void TestWeightedAverageLinkage(string path)
        {
            Console.WriteLine("Test: " + path);
            var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();

            var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

            average.Parameters = new FeatureClusterParameters <UMCLight>();
            average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Mean;
            average.Parameters.Tolerances             = new Algorithms.FeatureTolerances();

            var distance = new WeightedEuclideanDistance <UMCLight>();

            average.Parameters.DistanceFunction = distance.EuclideanDistance;
            var clusters = average.Cluster(features);

            Console.WriteLine("dataset\tfeature\tmass\tnet\tdrift");
            foreach (var newCluster in clusters)
            {
                foreach (var feature in newCluster.Features)
                {
                    Console.WriteLine("{0},{1},{2},{3},{4}", feature.GroupId,
                                      feature.Id,
                                      feature.Net,
                                      feature.MassMonoisotopicAligned,
                                      feature.DriftTime);
                }
            }
        }
Example #6
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        static int Main(string [] args)
        {
            var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle;

            SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS);

            try
            {
                if (args.Length < 2)
                {
                    Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]");
                    Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path");
                    return(1);
                }

                // Setup the analysis processing
                var databasePath = args[0];
                var databaseName = Path.GetFileNameWithoutExtension(databasePath);
                var path         = Path.GetDirectoryName(databasePath);
                var crossPath    = args[2];
                var chargeState  = Convert.ToInt32(args[1]);

                List <string> datasetList = null;
                if (args.Length == 4)
                {
                    datasetList = File.ReadAllLines(args[3]).ToList();
                }


                if (path == null)
                {
                    Console.WriteLine(@"The directory path is invalid");
                    return(1);
                }


                NHibernateUtil.ConnectToDatabase(databasePath, false);

                IDatasetDAO datasetCache = new DatasetDAOHibernate();
                var         dateSuffix   = AnalysisPathUtils.BuildDateSuffix();
                Logger.LogPath = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState));

                Logger.PrintMessage("Find all datasets", true);
                var datasets = datasetCache.FindAll();
                Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true);

                // Create the clustering algorithm - average linkage
                IClusterer <UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

                // Create the DAO object to extract the features
                var database = new UmcAdoDAO {
                    DatabasePath = databasePath
                };
                IUmcDAO featureDao = database;


                Logger.PrintMessage(string.Format("Extracting Features"), true);
                var tempFeatures = featureDao.FindByCharge(chargeState);
                Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true);


                var features = new List <UMCLight>();
                if (datasetList != null)
                {
                    var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower());

                    var focusedDatasetList = new Dictionary <int, DatasetInformation>();
                    foreach (var name in datasetList)
                    {
                        var key = name.ToLower();
                        if (featuremap.ContainsKey(key))
                        {
                            Logger.PrintMessage("Using dataset: " + name);
                            focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]);
                        }
                        else
                        {
                            throw new Exception("Didn't find the dataset required..." + name);
                        }
                    }

                    features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature);

                    Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true);
                }
                else
                {
                    features = tempFeatures;
                }

                // Handle logging progress.
                clusterer.Progress += clusterer_Progress;
                clusterer.Parameters.Tolerances.DriftTime        = .3;
                clusterer.Parameters.Tolerances.Mass             = 16;
                clusterer.Parameters.Tolerances.Net              = .014;
                clusterer.Parameters.OnlyClusterSameChargeStates = true;
                clusterer.Parameters.CentroidRepresentation      = ClusterCentroidRepresentation.Mean;
                clusterer.Parameters.DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(DistanceMetric.WeightedEuclidean);

                // Then cluster
                var clusterWriter = new UmcClusterWriter();
                IClusterWriter <UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter();
                try
                {
                    clusterWriter.Open(crossPath);
                    clusterWriter.WriteHeader(datasets);

                    clusterer.ClusterAndProcess(features, writer);
                    Logger.PrintMessage("", true);
                    Logger.PrintMessage("ANALYSIS SUCCESS", true);
                    return(0);
                }
                catch (Exception ex)
                {
                    Logger.PrintMessage("Unhandled Error: " + ex.Message);
                    var innerEx = ex.InnerException;
                    while (innerEx != null)
                    {
                        Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                        innerEx = innerEx.InnerException;
                    }
                    Logger.PrintMessage("Stack: " + ex.StackTrace);
                    Logger.PrintMessage("");
                    Logger.PrintMessage("ANALYSIS FAILED");
                    return(1);
                }
                finally
                {
                    clusterWriter.Close();
                }
            }
            catch (Exception ex)
            {
                Logger.PrintMessage("Unhandled Error: " + ex.Message, true);
                var innerEx = ex.InnerException;
                while (innerEx != null)
                {
                    Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                    innerEx = innerEx.InnerException;
                }
                Logger.PrintMessage("Stack: " + ex.StackTrace, true);
                Logger.PrintMessage("");
                Logger.PrintMessage("ANALYSIS FAILED");
                return(1);
            }
        }
Example #7
0
        //[TestCase(@"ClusterData\clusterData-single-1500.txt")]
        public void TestAverageLinkage(string path)
        {
            Console.WriteLine("Average Linkage Test: " + path);
            var features = GetClusterData(Path.Combine(TestPaths.TestFilesDirectory, path));

            Assert.IsNotEmpty(features);

            var cluster = new UMCClusterLight();

            cluster.Id = features[0].Id;
            features.ForEach(x => cluster.AddChildFeature(x));

            var maps = new Dictionary <int, UMCClusterLight>();

            var average = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>();

            average.Parameters = new FeatureClusterParameters <UMCLight>();
            average.Parameters.CentroidRepresentation = ClusterCentroidRepresentation.Median;
            average.Parameters.Tolerances             = new Algorithms.FeatureTolerances();
            average.Parameters.Tolerances.Net         = .02;
            average.Parameters.Tolerances.Mass        = 6;
            average.Parameters.Tolerances.DriftTime   = .3;

            var distance = new WeightedEuclideanDistance <UMCLight>();

            average.Parameters.DistanceFunction = distance.EuclideanDistance;
            var euclid = new EuclideanDistanceMetric <UMCLight>();

            average.Parameters.DistanceFunction = euclid.EuclideanDistance;
            var clusters = average.Cluster(features);

            Console.WriteLine("Clusters = {0}", clusters.Count);
            var id = 1;

            foreach (var testCluster in clusters)
            {
                testCluster.CalculateStatistics(ClusterCentroidRepresentation.Mean);
                var distances = new List <double>();

                // Show a sampling of 5 results
                var threshold = (int)(testCluster.Features.Count / (double)5);
                if (threshold < 1)
                {
                    threshold = 1;
                }

                testCluster.Id = id++;
                var featureID = 0;

                foreach (var feature in testCluster.Features)
                {
                    featureID++;
                    if (featureID % threshold == 0)
                    {
                        Console.WriteLine("{0},{1},{2},{3}",
                                          feature.Net,
                                          feature.MassMonoisotopicAligned,
                                          feature.DriftTime,
                                          testCluster.Id);
                    }

                    var newDistance = distance.EuclideanDistance(feature, testCluster);
                    distances.Add(newDistance);
                }
                //Console.WriteLine();
                //Console.WriteLine("Distances");
                //distances.ForEach(x => Console.WriteLine(x));
                //Console.WriteLine();
            }
        }
Example #8
0
        public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod)
        {
            var features = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = useBoxMethod,
                Parameters =
                {
                    CentroidRepresentation = ClusterCentroidRepresentation.Mean,
                    DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances =
                    {
                        Mass = 10,
                        DriftTime = .3,
                        Net = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);
            var i = 0;
            clusters.ForEach(x => x.Id = i++);
            WriteClusters(clusters);
        }
Example #9
0
        public void TestDistanceDistributions(string path, DistanceMetric dist)
        {
            var features = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = false,
                Parameters =
                {
                    CentroidRepresentation = ClusterCentroidRepresentation.Mean,
                    DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances =
                    {
                        Mass = 10,
                        DriftTime = .3,
                        Net = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);

            var distances = new List<double>();
            foreach (var cluster in clusters)
            {
                var centroid = new UMCLight();
                centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic;
                centroid.Net = cluster.Net;
                centroid.DriftTime = cluster.DriftTime;

                var func = clusterer.Parameters.DistanceFunction;
                foreach (var feature in cluster.Features)
                {
                    var distance = func(feature, centroid);
                    distances.Add(distance);
                }
                distances.Sort();
                var sum = 0;
                foreach (var distance in distances)
                {
                    sum++;
                    Console.WriteLine("{0},{1}", distance, sum);
                }
            }
        }
Example #10
0
        /// <summary>
        /// The main entry point for the application.
        /// </summary>        
        static int Main(string [] args)
        {
            var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle;
            SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS);

            try
            {
                if (args.Length < 2)
                {
                    Console.WriteLine(@"MultiAlignChargeStateProcessor databasePath chargeState crossTabPath [dataset List]");
                    Console.WriteLine(@"\tThe cross-tab file will be placed in the same directory as the database path");
                    return 1;
                }

                // Setup the analysis processing
                var databasePath = args[0];
                var databaseName = Path.GetFileNameWithoutExtension(databasePath);
                var path         = Path.GetDirectoryName(databasePath);
                var crossPath = args[2];
                var chargeState     = Convert.ToInt32(args[1]);

                List<string> datasetList = null;
                if (args.Length == 4)
                {
                    datasetList = File.ReadAllLines(args[3]).ToList();
                }

                if (path == null)
                {
                    Console.WriteLine(@"The directory path is invalid");
                    return 1;
                }

                NHibernateUtil.ConnectToDatabase(databasePath, false);

                IDatasetDAO datasetCache = new DatasetDAOHibernate();
                var dateSuffix        = AnalysisPathUtils.BuildDateSuffix();
                Logger.LogPath           = Path.Combine(path, string.Format("{0}_charge_{2}_{1}.txt", databaseName, dateSuffix, chargeState));

                Logger.PrintMessage("Find all datasets", true);
                var datasets = datasetCache.FindAll();
                Logger.PrintMessage(string.Format("Found {0} datasets", datasets.Count), true);

                // Create the clustering algorithm - average linkage
                IClusterer<UMCLight, UMCClusterLight> clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>();

                // Create the DAO object to extract the features
                var database      = new UmcAdoDAO {DatabasePath = databasePath};
                IUmcDAO featureDao = database;

                Logger.PrintMessage(string.Format("Extracting Features"), true);
                var tempFeatures = featureDao.FindByCharge(chargeState);
                Logger.PrintMessage(string.Format("Found {0} features", tempFeatures.Count), true);

                var features = new List<UMCLight>();
                if (datasetList != null)
                {
                    var featuremap = datasets.ToDictionary(info => info.DatasetName.ToLower());

                    var focusedDatasetList = new Dictionary<int, DatasetInformation>();
                    foreach (var name in datasetList)
                    {
                        var key = name.ToLower();
                        if (featuremap.ContainsKey(key))
                        {
                            Logger.PrintMessage("Using dataset: " + name);
                            focusedDatasetList.Add(featuremap[key].DatasetId, featuremap[key]);
                        }
                        else
                            throw new Exception("Didn't find the dataset required..." + name);
                    }

                    features.AddRange(from feature in tempFeatures let use = focusedDatasetList.ContainsKey(feature.GroupId) where use select feature);

                    Logger.PrintMessage(string.Format("Found {0} filtered features for dataset list", features.Count), true);
                }
                else
                {
                    features = tempFeatures;
                }

                // Handle logging progress.
                clusterer.Progress      += clusterer_Progress;
                clusterer.Parameters.Tolerances.DriftTime           = .3;
                clusterer.Parameters.Tolerances.Mass                = 16;
                clusterer.Parameters.Tolerances.Net       = .014;
                clusterer.Parameters.OnlyClusterSameChargeStates    = true;
                clusterer.Parameters.CentroidRepresentation         = ClusterCentroidRepresentation.Mean;
                clusterer.Parameters.DistanceFunction               = PNNLOmics.Algorithms.Distance.DistanceFactory<UMCLight>.CreateDistanceFunction(PNNLOmics.Algorithms.Distance.DistanceMetric.WeightedEuclidean);

                // Then cluster
                var clusterWriter = new UmcClusterWriter();
                IClusterWriter<UMCClusterLight> writer = clusterWriter; //new UMCClusterDummyWriter();
                try
                {
                    clusterWriter.Open(crossPath);
                    clusterWriter.WriteHeader(datasets);

                    clusterer.ClusterAndProcess(features, writer);
                    Logger.PrintMessage("", true);
                    Logger.PrintMessage("ANALYSIS SUCCESS", true);
                    return 0;
                }
                catch (Exception ex)
                {
                    Logger.PrintMessage("Unhandled Error: " + ex.Message);
                    var innerEx = ex.InnerException;
                    while (innerEx != null)
                    {
                        Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                        innerEx = innerEx.InnerException;
                    }
                    Logger.PrintMessage("Stack: " + ex.StackTrace);
                    Logger.PrintMessage("");
                    Logger.PrintMessage("ANALYSIS FAILED");
                    return 1;
                }
                finally
                {
                    clusterWriter.Close();
                }
            }
            catch (Exception ex)
            {
                Logger.PrintMessage("Unhandled Error: " + ex.Message, true);
                var innerEx = ex.InnerException;
                while (innerEx != null)
                {
                    Logger.PrintMessage("Inner Exception: " + innerEx.Message);
                    innerEx = innerEx.InnerException;
                }
                Logger.PrintMessage("Stack: " + ex.StackTrace, true);
                Logger.PrintMessage("");
                Logger.PrintMessage("ANALYSIS FAILED");
                return 1;
            }
        }