Beispiel #1
0
        public IVector FindDistances(IReadOnlyList <IVector> data, DistanceMetric distance)
        {
            Debug.Assert(IsValid && data.All(v => v.IsValid));
            if (distance == DistanceMetric.Cosine)
            {
                var norm     = DotProduct(this);
                var dataNorm = data.Select(d => d.DotProduct(d)).ToList();
                var ret      = new float[data.Count];
                for (var i = 0; i < data.Count; i++)
                {
                    ret[i] = Convert.ToSingle(1d - DotProduct(data[i]) / Math.Sqrt(norm * dataNorm[i]));
                }
                return(_cuda.CreateVector(data.Count, i => ret[i]));
            }

            if (distance == DistanceMetric.Euclidean || distance == DistanceMetric.Manhattan)
            {
                var ret = _cuda.CalculateDistances(new[] { this }, data, distance);
                return(ret.ReshapeAsVector());
            }
            else
            {
                var distanceFunc = _GetDistanceFunc(distance);
                var ret          = new float[data.Count];
                for (var i = 0; i < data.Count; i++)
                {
                    ret[i] = distanceFunc(data[i]);
                }
                return(_cuda.CreateVector(data.Count, i => ret[i]));
            }
        }
 /// <summary>
 /// Initializes a new instance of the FastMarching2dTest class.
 /// </summary>
 public GeodesicDistance()
     : base("Geodesic Distance", "GeoDist",
            "Computes the geodesic distance field for a set of sources.",
            "SpatialSlur", "Field")
 {
     Metric = DistanceMetric.Euclidean;
 }
Beispiel #3
0
        public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod)
        {
            var features  = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = useBoxMethod,
                Parameters =
                {
                    CentroidRepresentation      = ClusterCentroidRepresentation.Mean,
                    DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances                  =
                    {
                        Mass      =                                                       10,
                        DriftTime =                                                       .3,
                        Net       = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);
            var i        = 0;

            clusters.ForEach(x => x.Id = i++);
            WriteClusters(clusters);
        }
Beispiel #4
0
        public void TestDistancesEuclidean(string path, DistanceMetric dist)
        {
            var func = DistanceFactory <UMCClusterLight> .CreateDistanceFunction(DistanceMetric.Euclidean);

            var oldClusters = ReadClusters(path);
            var clusters    = CreateSingletonClustersFromClusteredFeatures(new List <UMCClusterLight> {
                oldClusters[1]
            });

            Console.WriteLine("Distance, Mass, NET, DT, Mass, Net, DT");

            for (var i = 0; i < clusters.Count; i++)
            {
                for (var j = i + 1; j < clusters.Count; j++)
                {
                    var distance = func(clusters[i], clusters[j]);
                    Console.WriteLine("{0},{1},{2},{3},{4},{5},{6}",
                                      distance,
                                      clusters[i].MassMonoisotopic,
                                      clusters[i].Net,
                                      clusters[i].DriftTime,
                                      clusters[j].MassMonoisotopic,
                                      clusters[j].Net,
                                      clusters[j].DriftTime);
                }
            }
        }
Beispiel #5
0
        public KMeans(int k, IReadOnlyList <IVector> data, DistanceMetric distanceMetric = DistanceMetric.Euclidean, int?randomSeed = null)
        {
            _k = k;
            _distanceMetric = distanceMetric;
            _cluster        = new ClusterData();
            _data           = data;

            // use kmeans++ to find best initial positions
            // https://normaldeviate.wordpress.com/2012/09/30/the-remarkable-k-means/
            var rand  = randomSeed.HasValue ? new Random(randomSeed.Value) : new Random();
            var data2 = data.ToList();

            // pick the first at random
            var firstIndex = rand.Next(0, data2.Count);

            _cluster.Add(data2[firstIndex]);
            data2.RemoveAt(firstIndex);

            // create a categorical distribution for each subsequent pick
            for (var i = 1; i < _k && data2.Count > 0; i++)
            {
                var probabilityList = new List <double>();
                foreach (var item in data2)
                {
                    using (var distance = _cluster.CalculateDistance(item, _distanceMetric)) {
                        var minIndex = distance.MinimumIndex();
                        probabilityList.Add(distance.AsIndexable()[minIndex]);
                    }
                }
                var distribution = new Categorical(probabilityList.ToArray());
                var nextIndex    = distribution.Sample();
                _cluster.Add(data2[nextIndex]);
                data2.RemoveAt(nextIndex);
            }
        }
Beispiel #6
0
        /// <summary>
        /// Calculates the distance between two matrices
        /// </summary>
        /// <param name="distance"></param>
        /// <param name="matrix1"></param>
        /// <param name="matrix2"></param>
        /// <returns></returns>
        public static IVector Calculate(this DistanceMetric distance, IMatrix matrix1, IMatrix matrix2)
        {
            switch (distance)
            {
            case DistanceMetric.Euclidean:
                using (var diff = matrix1.Subtract(matrix2))
                {
                    using var diffSquared = diff.PointwiseMultiply(diff);
                    using var rowSums     = diffSquared.RowSums();
                    return(rowSums.Sqrt());
                }

            case DistanceMetric.SquaredEuclidean:
                using (var diff = matrix1.Subtract(matrix2))
                {
                    using var diffSquared = diff.PointwiseMultiply(diff);
                    return(diffSquared.RowSums());
                }

            case DistanceMetric.Cosine:
            case DistanceMetric.Manhattan:
            case DistanceMetric.MeanSquared:
            default:
                throw new NotImplementedException();
            }
        }
Beispiel #7
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="lap">Linear algebra provider</param>
 /// <param name="data">List of vectors to compare</param>
 /// <param name="distanceMetric">Distance metric for comparison</param>
 public VectorDistanceHelper(ILinearAlgebraProvider lap, IReadOnlyList <IVector> data,
                             DistanceMetric distanceMetric = DistanceMetric.Euclidean)
 {
     _lap   = lap;
     Metric = distanceMetric;
     _data  = data;
 }
Beispiel #8
0
 /// <summary>
 /// K Means uses coordinate descent and a distance metric between randomly selected centroids to cluster the data
 /// </summary>
 /// <param name="data">The list of vectors to cluster</param>
 /// <param name="lap">Linear algebra provider</param>
 /// <param name="k">The number of clusters to find</param>
 /// <param name="maxIterations">The maximum number of iterations</param>
 /// <param name="distanceMetric">Distance metric to use to compare centroids</param>
 /// <returns>A list of k clusters</returns>
 public static IReadOnlyList <IReadOnlyList <IVector> > KMeans(this IReadOnlyList <IVector> data,
                                                               ILinearAlgebraProvider lap, int k, int maxIterations = 1000,
                                                               DistanceMetric distanceMetric = DistanceMetric.Euclidean)
 {
     using var clusterer = new KMeans(lap, k, data, distanceMetric);
     clusterer.ClusterUntilConverged(maxIterations);
     return(clusterer.Clusters);
 }
Beispiel #9
0
        public IVector FindDistances(IReadOnlyList <IVector> data, DistanceMetric distance)
        {
            var distanceFunc = _GetDistanceFunc(distance);
            var ret          = new float[data.Count];

            Parallel.ForEach(data, (vec, ps, ind) => ret[ind] = distanceFunc(vec));
            return(new CpuVector(DenseVector.Create(data.Count, i => ret[i])));
        }
Beispiel #10
0
 public static void Print(Position first, Position second, double distance, DistanceMetric metric)
 {
     Console.WriteLine("--------------------------------");
     Console.WriteLine("Distance Metric: " + metric.ToString());
     Console.WriteLine("Point 1: (" + first.X + "," + first.Y + ")");
     Console.WriteLine("Point 2: (" + second.X + "," + second.Y + ")");
     Console.WriteLine("Distance: " + distance);
     Console.WriteLine("--------------------------------");
 }
        /// <summary>
        /// Computes the distance between singleton clusters.
        /// </summary>
        /// <param name="cluster1">The cluster1.</param>
        /// <param name="cluster2">The cluster2.</param>
        /// <param name="distanceMetric">The distance metric.</param>
        /// <returns>Distance between singleton clusters</returns>
        public static double ComputeDistance(Cluster cluster1, Cluster cluster2, DistanceMetric distanceMetric)
        {
            double distance = 0;

            // check if clusters are singleton
            if (cluster1.QuantityOfDataPoints == 1 && cluster2.QuantityOfDataPoints == 1)
                distance = Distance.GetDistance(cluster1.DataPoints[0], cluster2.DataPoints[0], distanceMetric);

            return distance;
        }
Beispiel #12
0
        public KNNClassifier(ILinearAlgebraProvider lap, KNearestNeighbours model, int k, DistanceMetric distanceMetric = DistanceMetric.Euclidean)
        {
            _k              = k;
            _lap            = lap;
            _model          = model;
            _distanceMetric = distanceMetric;

            for (int i = 0, len = model.Instance.Length; i < len; i++)
            {
                _instance.Add(lap.Create(model.Instance[i].Data));
            }
        }
Beispiel #13
0
        /// <summary>
        ///     Gets the distance.
        /// </summary>
        /// <param name="x">The x.</param>
        /// <param name="y">The y.</param>
        /// <param name="distanceMetric">The distance metric.</param>
        /// <returns>Datapoints distance</returns>
        /// <exception cref="ArgumentException">Неравное колличество точек.</exception>
        public static double GetDistance(DataPoint x, DataPoint y, DistanceMetric distanceMetric)
        {
            double distance = 0;
            double diff;

            // checks for dimensions match
            if (x.Count != y.Count)
            {
                throw new ArgumentException("Неравное колличество точек.");
            }

            switch (distanceMetric)
            {
            case DistanceMetric.EuclidianDistance:     // calculates by using Euclidian Distance
                for (var i = 0; i < x.Count; i++)
                {
                    diff      = x[i] - y[i];
                    distance += diff * diff;
                }

                distance = Math.Sqrt(distance);
                break;

            case DistanceMetric.SquareEuclidianDistance:     // calculates by using Square of Euclidian Distance
                for (var i = 0; i < x.Count; i++)
                {
                    diff      = x[i] - y[i];
                    distance += diff * diff;
                }

                break;

            case DistanceMetric.ManhattanDistance:     // calculates by using Manhattan Distance
                for (var i = 0; i < x.Count; i++)
                {
                    diff      = x[i] - y[i];
                    distance += Math.Abs(diff);
                }

                break;

            case DistanceMetric.ChebyshevDistance:     // calculates by using Chebyshev Distance
                for (var i = 0; i < x.Count; i++)
                {
                    diff     = Math.Abs(x[i] - y[i]);
                    distance = distance > diff ? distance : diff;
                }

                break;
            }

            return(distance);
        }
Beispiel #14
0
        public static IEnumerable <Place> CalculateDendrogram(List <Place> places, DistanceMetric metric)
        {
            AGNESDendrogram agnesDendrogram = new AGNESDendrogram();
            List <Place>    toRet           = agnesDendrogram.FitTrnaform(places, metric);

            // Generate visual representation of the dendrogram
            //Application.EnableVisualStyles();
            //Application.SetCompatibleTextRenderingDefault(false);
            //Application.Run(new DendrogramForm(agnesDendrogram.Dendrogram));

            return(toRet);
        }
Beispiel #15
0
        /// <summary>
        /// Gets the sum of squared error.
        /// </summary>
        /// <param name="distanceMetric">The distance metric.</param>
        /// <returns>Sum of squared error of cluster</returns>
        public double GetSumOfSquaredError(DistanceMetric distanceMetric)
        {
            double squaredErrorSum = 0;

            //distance of each element to clustercenter
            foreach (var pattern in DataPoints)
            {
                var distToCenter = Distance.GetDistance(Centroid, pattern, distanceMetric);
                squaredErrorSum += Math.Pow(distToCenter, 2);
            }
            return(squaredErrorSum);
        }
Beispiel #16
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="dm">distance metric</param>
 public BKTree(DistanceMetric dm)
 {
     if (dm == DistanceMetric.Hamming)
     {
         metric         = DistanceMetric.Hamming;
         DistanceMethod = HammingDistance;
     }
     else
     {
         metric         = DistanceMetric.Levenshtein;
         DistanceMethod = LevenshteinDistance;
     }
 }
Beispiel #17
0
        /// <summary>
        /// Initializes a new instance of the <see cref="Agnes"/> class.
        /// </summary>
        /// <param name="clusters">The clusters.</param>
        /// <param name="distanceMetric">The distance metric.</param>
        /// <param name="strategy">The strategy.</param>
        public Agnes(ClusterSet clusters, DistanceMetric distanceMetric, MergeStrategy strategy)
        {
            _clusters = clusters;
            _initialNumberOfClusters = clusters.Count;
            _distanceMetric          = distanceMetric;
            _strategy = strategy;

            // creating initial dissimilarity matrix from _clusters
            BuildDissimilarityMatrix();

            _chValue = new List <double>();
            _chIndex = new List <int>();
        }
Beispiel #18
0
            private void CheckMaxCapacity()
            {
                if (this.ChildrenCount > this.Tree.MaxChildren)
                {
                    DistanceMetric <T>          distanceMetric = this.Tree.distanceMetric;
                    Tuple <T, T>                promotions     = this.Tree.splitPolicy.Promote(this.Children.Keys, distanceMetric);
                    Tuple <ISet <T>, ISet <T> > partitions     = this.Tree.splitPolicy.Partition(promotions, this.Children.Keys, distanceMetric);

                    Node newNode0 = this.GetNewNode(promotions.Item1, partitions.Item1);
                    Node newNode1 = this.GetNewNode(promotions.Item2, partitions.Item2);

                    throw new SplitNodeReplacement(newNode0, newNode1);
                }
            }
Beispiel #19
0
        public static (Position, Position, double) Calculate(List <Position> points, DistanceMetric metric)
        {
            Position closestA = null, closestB = null;
            double   minDistance = double.MaxValue;

            for (int i = 0; i < points.Count - 1; i++)
            {
                for (int j = i + 1; j < points.Count; j++)
                {
                    Position point1 = points[i];
                    Position point2 = points[j];

                    double distance;
                    switch (metric)
                    {
                    case DistanceMetric.EuclideanDistance:
                        distance = DistanceMetricFunctions.EuclideanDistance(point1, point2);
                        break;

                    case DistanceMetric.MeanAbsoluteError:
                        distance = DistanceMetricFunctions.MeanAbsoluteError(point1, point2);
                        break;

                    case DistanceMetric.MeanSquaredError:
                        distance = DistanceMetricFunctions.MeanSquaredError(point1, point2);
                        break;

                    case DistanceMetric.SumOfAbsoluteDistance:
                        distance = DistanceMetricFunctions.SumOfAbsoluteDistance(point1, point2);
                        break;

                    case DistanceMetric.SumOfSquaredDifference:
                        distance = DistanceMetricFunctions.SumOfSquaredDifference(point1, point2);
                        break;

                    default:
                        throw new NotSupportedException("Metric type not supported");
                    }

                    if (distance < minDistance)
                    {
                        closestA    = point1;
                        closestB    = point2;
                        minDistance = distance;
                    }
                }
            }

            return(closestA, closestB, minDistance);
        }
Beispiel #20
0
 /// <summary>
 /// Constructor (Add to subtree)
 /// </summary>
 /// <param name="w">word</param>
 /// <param name="dist">distance</param>
 /// <param name="dm">distance metric</param>
 public BKTree(string w, int dist, DistanceMetric dm)
 {
     w      = w.ToLower();
     metric = dm;
     if (dm == DistanceMetric.Hamming)
     {
         DistanceMethod = HammingDistance;
     }
     else
     {
         DistanceMethod = LevenshteinDistance;
     }
     word       = w;
     distParent = dist;
 }
Beispiel #21
0
            public IVector CalculateDistance(IVector vector, DistanceMetric distanceMetric)
            {
                if (_curr == null)
                {
                    _curr = _centroid.Select(c => c.Current).ToArray();
                }

                if (distanceMetric == DistanceMetric.Cosine)
                {
                    return(vector.CosineDistance(_curr, ref _clusterNorm));
                }
                else
                {
                    return(vector.FindDistances(_curr, distanceMetric));
                }
            }
Beispiel #22
0
        /// <summary>
        /// Handles the Click event of the calculateClusterCountButton control.
        /// </summary>
        /// <param name="sender">The source of the event.</param>
        /// <param name="e">The <see cref="EventArgs"/> instance containing the event data.</param>
        /// <exception cref="Clusterizer.CustomException">Не было выбрано ни одного показателя. - Ощибка при выборе показателей</exception>
        private void calculateClusterCountButton_Click(object sender, EventArgs e)
        {
            // gets selected parameters of clustering
            distanceMetric  = (DistanceMetric)distanceSelectComboBox.SelectedIndex;
            strategy        = (MergeStrategy)strategySelectComboBox.SelectedIndex;
            normalizeMethod = (NormalizeMethod)normalizeMethodSelectComboBox.SelectedIndex;

            // gets selected datapoints
            var  isChosen   = new bool[Tools.NumericDataHeadings.Length];
            bool isAllFalse = true;
            int  ind        = 0;

            for (int i = 0; i < pointsSelectTreeView.Nodes.Count; i++)
            {
                for (int j = 0; j < pointsSelectTreeView.Nodes[i].Nodes.Count; j++)
                {
                    isChosen[ind] = pointsSelectTreeView.Nodes[i].Nodes[j].Checked;
                    if (isAllFalse)
                    {
                        isAllFalse = !isChosen[ind];
                    }
                    ind++;
                }
            }

            // check if no datapoint is selected
            if (isAllFalse)
            {
                throw new CustomException("Не было выбрано ни одного показателя.", "Ощибка при выборе показателей");
            }


            // gets cluster set from data
            var clusters = Tools.Data.GetClusterSet(Tools.isChosen);

            clusters.Normalize(normalizeMethod);

            // executes clustering for determining recomended count of clusters
            Agnes agnes = new Agnes(clusters,
                                    distanceMetric, strategy);

            agnes.ExecuteClustering(2, true);

            // gets recomended count of clusters
            countOfClusters          = agnes.GetRecommendedCountOfClusters();
            clusterCountTextBox.Text = $"{countOfClusters}";
        }
Beispiel #23
0
        public void TestDistanceDistributions(string path, DistanceMetric dist)
        {
            var features  = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer <UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = false,
                Parameters =
                {
                    CentroidRepresentation      = ClusterCentroidRepresentation.Mean,
                    DistanceFunction            = DistanceFactory <UMCLight> .CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances                  =
                    {
                        Mass      =                                                       10,
                        DriftTime =                                                       .3,
                        Net       = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);

            var distances = new List <double>();

            foreach (var cluster in clusters)
            {
                var centroid = new UMCLight();
                centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic;
                centroid.Net       = cluster.Net;
                centroid.DriftTime = cluster.DriftTime;

                var func = clusterer.Parameters.DistanceFunction;
                foreach (var feature in cluster.Features)
                {
                    var distance = func(feature, centroid);
                    distances.Add(distance);
                }
                distances.Sort();
                var sum = 0;
                foreach (var distance in distances)
                {
                    sum++;
                    Console.WriteLine("{0},{1}", distance, sum);
                }
            }
        }
Beispiel #24
0
        /*
         * To use BKTree:
         * 1. Create a class dervied from BKTreeNode
         * 2. Add a member variable of your data to be sorted / retrieved
         * 3. Override the calculateDistance method to calculate the distance metric
         *    between two nodes for the data to be sorted / retrieved.
         * 4. Instantiate a BKTree with the type name of the class created in (1).
         */

        static void Main(string[] args)
        {
            /*
             * NOTE: More comprehensive examples of BK-Tree methods in unit tests
             */

            // Exercise static distance metric methods -- just because
            Console.WriteLine(
                DistanceMetric.calculateHammingDistance(
                    new byte[] { 0xEF, 0x35, 0x20 },
                    new byte[] { 0xAD, 0x13, 0x87 }));

            Console.WriteLine(
                DistanceMetric.calculateLeeDistance(
                    new int[] { 196, 105, 48 },
                    new int[] { 201, 12, 51 }));

            Console.WriteLine(
                DistanceMetric.calculateLevenshteinDistance(
                    "kitten",
                    "sitting"));


            // Create BKTree with derived node class from top of file
            BKTree <ExampleNodeRecord> tree = new BKTree <ExampleNodeRecord>();

            // Add some nodes
            tree.add(new ExampleNodeRecord(1, new int[] { 100, 200, 300 }));
            tree.add(new ExampleNodeRecord(2, new int[] { 110, 210, 310 }));
            tree.add(new ExampleNodeRecord(3, new int[] { 120, 220, 320 }));
            tree.add(new ExampleNodeRecord(4, new int[] { 130, 230, 330 }));
            tree.add(new ExampleNodeRecord(5, new int[] { 140, 240, 340 }));

            // Get best node from our tree with best distance
            Dictionary <ExampleNodeRecord, Int32> results =
                tree.findBestNodeWithDistance(
                    new ExampleNodeRecord(new int[] { 103, 215, 303 }));

            // Get best nodes below threshold
            results = tree.query(
                new ExampleNodeRecord(new int[] { 103, 215, 303 }),
                10);  // arbitrary threshold

            // Dictionaries don't print well; so invent your own handy print routine
        }
Beispiel #25
0
        /// <summary>
        /// Handles the Click event of the doClusteringButton control.
        /// </summary>
        /// <param name="sender">The source of the event.</param>
        /// <param name="e">The <see cref="EventArgs"/> instance containing the event data.</param>
        /// <exception cref="Clusterizer.CustomException">
        /// Введите правилное количество кластеров. - Ошибка при вводе числа кластеров
        /// or
        /// Не было выбрано не одного показателя. - Ощибка при выборе показателей
        /// </exception>
        private void doClusteringButton_Click(object sender, EventArgs e)
        {
            // Gets selected parameters of clustering
            distanceMetric  = (DistanceMetric)distanceSelectComboBox.SelectedIndex;
            strategy        = (MergeStrategy)strategySelectComboBox.SelectedIndex;
            normalizeMethod = (NormalizeMethod)normalizeMethodSelectComboBox.SelectedIndex;

            // checks for correct cluster number
            if (int.TryParse(clusterCountTextBox.Text, out var tmp) && tmp > 0 && tmp < Tools.Data.Rows.Count)
            {
                countOfClusters = tmp;
            }
            else
            {
                throw new CustomException("Введите правилное количество кластеров.", "Ошибка при вводе числа кластеров");
            }

            // gets selected datapoints
            var  isChosen   = new bool[Tools.NumericDataHeadings.Length];
            bool isAllFalse = true;
            int  ind        = 0;

            for (int i = 0; i < pointsSelectTreeView.Nodes.Count; i++)
            {
                for (int j = 0; j < pointsSelectTreeView.Nodes[i].Nodes.Count; j++)
                {
                    isChosen[ind] = pointsSelectTreeView.Nodes[i].Nodes[j].Checked;
                    if (isAllFalse)
                    {
                        isAllFalse = !isChosen[ind];
                    }
                    ind++;
                }
            }

            // check if no datapoint is selected
            if (isAllFalse)
            {
                throw new CustomException("Не было выбрано не одного показателя.", "Ощибка при выборе показателей");
            }

            Tools.isChosen       = isChosen;
            isParametersSelected = true;
            Close();
        }
Beispiel #26
0
        public List <Place> FitTrnaform(List <Place> places, DistanceMetric metric)
        {
            List <Dendrogram> tempDendrograms = places.Select(place => new LeafNode(place)).ToList().ConvertAll(leaf => (Dendrogram)leaf);

            while (tempDendrograms.Count > 1)
            {
                double     minDist = double.MaxValue;
                Dendrogram minDend1 = null, minDend2 = null;
                foreach (Dendrogram dend1 in tempDendrograms)
                {
                    foreach (Dendrogram dend2 in tempDendrograms)
                    {
                        if (dend1 == dend2)
                        {
                            continue;
                        }
                        double tempDist = dend1.DistanceTo(dend2, metric);
                        if (tempDist < minDist)
                        {
                            minDist  = tempDist;
                            minDend1 = dend1;
                            minDend2 = dend2;
                        }
                    }
                }

                if (minDend1 is LeafNode)
                {
                    this.placesByRemoteness.Add(minDend1.Places[0]);
                }
                if (minDend2 is LeafNode)
                {
                    this.placesByRemoteness.Add(minDend2.Places[0]);
                }

                Dendrogram combinedDend = new InternalNode(minDend1, minDend2, minDist);
                tempDendrograms.Remove(minDend1);
                tempDendrograms.Remove(minDend2);
                tempDendrograms.Add(combinedDend);
            }
            this.Dendrogram = tempDendrograms[0];
            this.placesByRemoteness.Reverse();
            return(this.placesByRemoteness);
        }
Beispiel #27
0
 public IVector FindDistances(IReadOnlyList <IVector> data, DistanceMetric distance)
 {
     Debug.Assert(IsValid && data.All(v => v.IsValid));
     if (distance == DistanceMetric.Cosine)
     {
         var norm     = DotProduct(this);
         var dataNorm = data.Select(d => d.DotProduct(d)).ToList();
         var ret      = new float[data.Count];
         for (var i = 0; i < data.Count; i++)
         {
             ret[i] = Convert.ToSingle(1d - DotProduct(data[i]) / Math.Sqrt(norm * dataNorm[i]));
         }
         return(new GpuVector(_cuda, data.Count, i => ret[i]));
     }
     else if (distance == DistanceMetric.Euclidean)
     {
         var ptrArray = data.Cast <GpuVector>().Select(d => d._data.DevicePointer).ToArray();
         var ret      = _cuda.MultiEuclideanDistance(_data, ptrArray, _size);
         using (var matrix = new GpuMatrix(_cuda, _size, data.Count, ret)) {
             using (var temp = matrix.ColumnSums())
                 return(temp.Sqrt());
         }
     }
     else if (distance == DistanceMetric.Manhattan)
     {
         var ptrArray = data.Cast <GpuVector>().Select(d => d._data.DevicePointer).ToArray();
         var ret      = _cuda.MultiManhattanDistance(_data, ptrArray, _size);
         using (var matrix = new GpuMatrix(_cuda, _size, data.Count, ret)) {
             return(matrix.ColumnSums());
         }
     }
     else
     {
         var distanceFunc = _GetDistanceFunc(distance);
         var ret          = new float[data.Count];
         for (var i = 0; i < data.Count; i++)
         {
             ret[i] = distanceFunc(data[i]);
         }
         return(new GpuVector(_cuda, data.Count, i => ret[i]));
     }
 }
Beispiel #28
0
        /// <summary>
        /// Calculates the distance between two vectors
        /// </summary>
        /// <param name="distance"></param>
        /// <param name="vector1"></param>
        /// <param name="vector2"></param>
        public static float Calculate(this DistanceMetric distance, IVector vector1, IVector vector2)
        {
            switch (distance)
            {
            case DistanceMetric.Cosine:
                return(vector1.CosineDistance(vector2));

            case DistanceMetric.Euclidean:
                return(vector1.EuclideanDistance(vector2));

            case DistanceMetric.Manhattan:
                return(vector1.ManhattanDistance(vector2));

            case DistanceMetric.SquaredEuclidean:
                return(vector1.SquaredEuclidean(vector2));

            default:
                return(vector1.MeanSquaredDistance(vector2));
            }
        }
Beispiel #29
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MTree{T}"/> class.
 /// </summary>
 /// <param name="minChildren">The minimum number of children nodes for a node.</param>
 /// <param name="maxChildren">The maximum number of children nodes for a node.</param>
 /// <param name="distanceMetric">
 ///     The distance metric to be used to calculate distances between data points.
 ///
 ///     The distance metric must satisfy the following properties:
 ///     <list type="bullet">
 ///         <item>
 ///             <description><c>d(a,b) = d(b,a)</c> for every <c>a</c> and <c>b</c> points (symmetry)</description>
 ///         </item>
 ///         <item>
 ///             <description><c>d(a,a) = 0</c> and <c>d(a,b) &gt; 0</c> for every <c>a != b</c> points (non-negativity)</description>
 ///         </item>
 ///         <item>
 ///             <description><c>d(a,b) &lt;= d(a,c) + d(c,b)</c> for every <c>a</c>, <c>b</c> and <c>c</c> points (triangle inequality)</description>
 ///         </item>
 ///     </list>
 /// </param>
 /// <param name="splitPolicy">The split policy to use. It consist of a partition and a promotion policy. <see cref="SplitPolicy{DATA}"/></param>
 /// <exception cref="ArgumentOutOfRangeException">
 /// <c>minChildren</c> is less than 1
 /// or
 /// <c>maxChildren</c> is less than <c>minChildren</c>
 /// </exception>
 /// <exception cref="ArgumentNullException">
 /// <c>distanceMetric</c> is missing
 /// or
 /// <c>splitPolicy</c> is missing
 /// </exception>
 public MTree(
     Int32 minChildren,
     Int32 maxChildren,
     DistanceMetric <T> distanceMetric,
     ISplitPolicy <T> splitPolicy)
 {
     if (minChildren < 1)
     {
         throw new ArgumentOutOfRangeException(nameof(minChildren), CoreMessages.MinimumNumberOfChildNodesIsLessThan1);
     }
     if (minChildren >= maxChildren)
     {
         throw new ArgumentOutOfRangeException(nameof(maxChildren), CoreMessages.MaximumNumberOfChildNodesIsEqualToMinimum);
     }
     this.MinChildren       = minChildren;
     this.MaxChildren       = maxChildren;
     this.distanceMetric    = distanceMetric ?? throw new ArgumentNullException(nameof(distanceMetric));
     this.splitPolicy       = splitPolicy ?? throw new ArgumentNullException(nameof(splitPolicy));
     this.NumberOfDataItems = 0;
 }
Beispiel #30
0
        public IMatrix CalculateDistances(IReadOnlyList <IVector> vectors,
                                          IReadOnlyList <IVector> compareTo, DistanceMetric distanceMetric)
        {
            var rows    = compareTo.Count;
            var columns = vectors.Count;

            Debug.Assert(vectors[0].Count == compareTo[0].Count);
            var ret = new float[rows * columns];

            Parallel.ForEach(vectors,
                             (column1, _, i) =>
            {
                Parallel.ForEach(compareTo,
                                 (column2, __, j) =>
                {
                    ret[i * rows + j] = column1.FindDistance(column2, distanceMetric);
                });
            });
            return(new CpuMatrix(DenseMatrix.Build.Dense(rows, columns, ret)));
        }
Beispiel #31
0
 public static DistanceFunc GetDistanceFunction(DistanceMetric distanceMetric)
 {
     DistanceFunc ret;
     switch(distanceMetric)
     {
     case DistanceMetric.EuclidianSq:
         ret = new DistanceFunc(EuclidianSq);
         break;
     case DistanceMetric.Euclidian:
         ret = new DistanceFunc(Euclidian);
         break;
     case DistanceMetric.Manhattan:
         ret = new DistanceFunc(Manhattan);
         break;
     case DistanceMetric.Chebyshev:
         ret = new DistanceFunc(Chebyshev);
         break;
     default:
         ret = new DistanceFunc(EuclidianSq);
         break;
     }
     return ret;
 }
Beispiel #32
0
        public void TestDistanceDistributions(string path, DistanceMetric dist)
        {
            var features = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = false,
                Parameters =
                {
                    CentroidRepresentation = ClusterCentroidRepresentation.Mean,
                    DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances =
                    {
                        Mass = 10,
                        DriftTime = .3,
                        Net = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);

            var distances = new List<double>();
            foreach (var cluster in clusters)
            {
                var centroid = new UMCLight();
                centroid.MassMonoisotopicAligned = cluster.MassMonoisotopic;
                centroid.Net = cluster.Net;
                centroid.DriftTime = cluster.DriftTime;

                var func = clusterer.Parameters.DistanceFunction;
                foreach (var feature in cluster.Features)
                {
                    var distance = func(feature, centroid);
                    distances.Add(distance);
                }
                distances.Sort();
                var sum = 0;
                foreach (var distance in distances)
                {
                    sum++;
                    Console.WriteLine("{0},{1}", distance, sum);
                }
            }
        }
Beispiel #33
0
        public void TestDistancesEuclidean(string path, DistanceMetric dist)
        {
            var func = DistanceFactory<UMCClusterLight>.CreateDistanceFunction(DistanceMetric.Euclidean);
            var oldClusters = ReadClusters(path);
            var clusters = CreateSingletonClustersFromClusteredFeatures(new List<UMCClusterLight> {oldClusters[1]});

            Console.WriteLine("Distance, Mass, NET, DT, Mass, Net, DT");

            for (var i = 0; i < clusters.Count; i++)
            {
                for (var j = i + 1; j < clusters.Count; j++)
                {
                    var distance = func(clusters[i], clusters[j]);
                    Console.WriteLine("{0},{1},{2},{3},{4},{5},{6}",
                        distance,
                        clusters[i].MassMonoisotopic,
                        clusters[i].Net,
                        clusters[i].DriftTime,
                        clusters[j].MassMonoisotopic,
                        clusters[j].Net,
                        clusters[j].DriftTime);
                }
            }
        }
Beispiel #34
0
        public void TestRestrictiveBoxMethod(string path, DistanceMetric dist, bool useBoxMethod)
        {
            var features = ReadFeatures(path);
            var clusterer = new UMCAverageLinkageClusterer<UMCLight, UMCClusterLight>
            {
                ShouldTestClustersWithinTolerance = useBoxMethod,
                Parameters =
                {
                    CentroidRepresentation = ClusterCentroidRepresentation.Mean,
                    DistanceFunction = DistanceFactory<UMCLight>.CreateDistanceFunction(dist),
                    OnlyClusterSameChargeStates = true,
                    Tolerances =
                    {
                        Mass = 10,
                        DriftTime = .3,
                        Net = .03
                    }
                }
            };

            var clusters = clusterer.Cluster(features);
            var i = 0;
            clusters.ForEach(x => x.Id = i++);
            WriteClusters(clusters);
        }