示例#1
0
        async Task <KMeansData> StandardiseData(UserModel model)
        {
            //TODO: range age, avg age | max 1 min 0 avg of interests
            KMeansStatistics statistics = await GetStatistics();

            KMeansData kuser = new KMeansData
            {
                Id      = model.Id,
                Hobbies = { ["Age"] = (model.Age - statistics.AvgAge) / statistics.AgeRange }
            };

            foreach (var userInterest in model.Interests)
            {
                kuser.Hobbies[userInterest] = 1;
            }
            //await _database.KMeansInsert(kuser);

            // finalizing the data
            KMeansData sample = new KMeansData();

            foreach (var hobby in sample.Hobbies.Keys)
            {
                kuser.Hobbies[hobby] =
                    (kuser.Hobbies[hobby] - statistics.HobbiesAverages[hobby]) / statistics.HobbiesRange;
            }
            return(kuser);
        }
示例#2
0
        public async Task <int> FindClosestCentroidAsync(UserModel model)
        {
            List <KMeansData> centroids =
                await _database.GetCollection <KMeansData>("Centroids").Find(_ => true).ToListAsync();

            KMeansData standardisedData = await StandardiseData(model);

            // ADD STANDARDISATION

            int    result          = 0;
            double closestCentroid = Double.MaxValue;

            for (int i = 0; i < centroids.Count; i++)
            {
                double dist = 0;
                foreach (var pointHobby in standardisedData.Hobbies)
                {
                    double hobbyDist = Math.Pow((pointHobby.Value - centroids[i].Hobbies[pointHobby.Key]), 2);
                    dist += hobbyDist;
                }
                if (dist < closestCentroid)
                {
                    closestCentroid = dist;
                    result          = i;
                }
            }
            return(result);
        }
示例#3
0
        async Task <KMeansStatistics> GetStatistics()
        {
            var users = await GetUsers();

            KMeansStatistics statistics = new KMeansStatistics();

            //if(await _database.GetCollection<KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync() != null) return await _database.GetCollection<KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync();
            if (users.Count != 0 && users.Count % 300 == 0) // testing purposes / update statistics each 300 users to get accurate centroids
            {
                statistics.AgeRange = _database.GetCollection <UserModel>("Users").Find(_ => true)
                                      .Sort(Builders <UserModel> .Sort.Descending("Age")).FirstOrDefault().Age -
                                      _database.GetCollection <UserModel>("Users").Find(_ => true)
                                      .Sort(Builders <UserModel> .Sort.Ascending("Age")).FirstOrDefault().Age;
                double avgAge = 0;
                foreach (var userModel in users)
                {
                    avgAge += userModel.Age;
                }
                statistics.AvgAge = avgAge / users.Count;

                // Hobbies average
                KMeansData sumOfInterests = new KMeansData();
                foreach (var user in users)
                {
                    foreach (var userInterest in user.Interests)
                    {
                        sumOfInterests.Hobbies[userInterest] += 1;
                    }
                }

                // average for each hobby
                foreach (var interest in sumOfInterests.Hobbies.Keys)
                {
                    statistics.HobbiesAverages[interest] = sumOfInterests.Hobbies[interest] / users.Count;
                }
                await _database.DropCollectionAsync("KMeansStatistics");

                await _database.GetCollection <KMeansStatistics>("KMeansStatistics").InsertOneAsync(statistics);

                return(statistics);
            }
            else
            {
                return(await _database.GetCollection <KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync());
            }
        }
示例#4
0
        public async Task <List <UserModel> > FindClosestCentroidListAsync()
        {
            List <KMeansData> centroids =
                await _database.GetCollection <KMeansData>("Centroids").Find(_ => true).ToListAsync();

            List <UserModel> users = await GetUsers();

            // ADD STANDARDISATION
            return(await AssignClustersListAsync(centroids, users));

            async Task <List <UserModel> > AssignClustersListAsync(List <KMeansData> centroidsList, List <UserModel> clusters)
            {
                foreach (var cluster in clusters)
                {
                    KMeansData standardisedData = await StandardiseData(cluster);

                    // Find distances between cluster and existing centroids
                    double[] distancesFromCentroids = new double[centroidsList.Count];
                    for (int i = 0; i < centroidsList.Count; i++)
                    {
                        double dist = 0;
                        foreach (var clusterHobby in standardisedData.Hobbies)
                        {
                            dist += Math.Pow((clusterHobby.Value - centroidsList[i].Hobbies[clusterHobby.Key]), 2);
                        }
                        distancesFromCentroids[i] = dist;
                    }

                    // Which is the nearest?
                    int centroidWithMin = 0;
                    for (int i = centroidWithMin + 1; i < distancesFromCentroids.Length; i++)
                    {
                        if (distancesFromCentroids[centroidWithMin] > distancesFromCentroids[i])
                        {
                            centroidWithMin = i;
                        }
                    }
                    cluster.Cluster = centroidWithMin;
                }
                return(clusters);
            }
        }
示例#5
0
 public async Task ClusterisedInsert(KMeansData model)
 {
     await _database.GetCollection <KMeansData>("Clusterized").InsertOneAsync(model);
 }
示例#6
0
 public async Task KMeansInsert(KMeansData model)
 {
     await _database.GetCollection <KMeansData>("KMeans").InsertOneAsync(model);
 }
示例#7
0
        public async Task Clusterize()
        {
            List <KMeansData> data = await GetKMeansData();

            List <KMeansData> centroids = await AnomalousPatterns();

            data = await ClusterizeAsync();

            data.Sort((s1, s2) => s1.Cluster.CompareTo(s2.Cluster));
            foreach (var kMeansData in data)
            {
                await ClusterisedInsert(kMeansData);
            }

            foreach (var centroid in centroids)
            {
                await InsertCentroids(centroid);
            }

            async Task <List <KMeansData> > AnomalousPatterns()
            {
                // 1. Count distance
                // 2. Assign clusters to furthest centroid and center
                // 3. Remove if belongs to furthest
                List <KMeansData> centroidsList = new List <KMeansData>();
                List <KMeansData> tempData      = await GetKMeansData();

                KMeansData center   = new KMeansData(); //0,0,0,0,0,0...
                KMeansData furthest = new KMeansData();

                while (tempData.Count != 0)
                {
                    furthest = FindFurthestCluster(tempData, furthest);
                    tempData.Remove(furthest);
                    tempData = AssignClusters(new List <KMeansData> {
                        center, furthest
                    }, tempData);
                    if (tempData.Count(x => x.Cluster == 1) >= 4)
                    {
                        centroidsList.Add(furthest);             // if centroid has more than two members add to centroids list
                        tempData.RemoveAll(x => x.Cluster == 1); // 1 stands for furthest, remove if it belongs to it
                    }
                }
                return(centroidsList);
            }

            async Task <List <KMeansData> > ClusterizeAsync()
            {
                int maxCycles = 30;

                for (int i = 0; i < maxCycles; i++)
                {
                    data = AssignClusters(centroids, data);
                    UpdateCentroids(ref centroids, data, out var updated);
                    if (!updated)
                    {
                        break;
                    }
                }
                return(data);
            }

            bool CompareDictionaries(Dictionary <string, double> x, Dictionary <string, double> y)
            {
                foreach (var xKey in x.Keys)
                {
                    if (!x[xKey].Equals(y[xKey]))
                    {
                        return(false);
                    }
                }
                return(true);
            }

            KMeansData FindFurthestCluster(List <KMeansData> tempData, KMeansData furthest)
            {
                KMeansData center          = new KMeansData();
                double     furthestCluster = Double.MinValue;

                foreach (var point in tempData)
                {
                    double dist = 0;
                    foreach (var pointHobby in point.Hobbies)
                    {
                        double hobbyDist = Math.Pow((pointHobby.Value - center.Hobbies[pointHobby.Key]), 2);
                        dist += hobbyDist;
                    }
                    if (dist > furthestCluster)
                    {
                        furthestCluster = dist;
                        furthest        = point;
                    }
                }
                return(furthest);
            }

            List <KMeansData> AssignClusters(List <KMeansData> centroidsList, List <KMeansData> clusters)
            {
                foreach (var cluster in clusters)
                {
                    // Find distances between cluster and existing centroids
                    double[] distancesFromCentroids = new double[centroidsList.Count];
                    for (int i = 0; i < centroidsList.Count; i++)
                    {
                        double dist = 0;
                        foreach (var clusterHobby in cluster.Hobbies)
                        {
                            dist += Math.Pow((clusterHobby.Value - centroidsList[i].Hobbies[clusterHobby.Key]), 2);
                        }
                        distancesFromCentroids[i] = dist;
                    }

                    // Which is the nearest?
                    int centroidWithMin = 0;
                    for (int i = centroidWithMin + 1; i < distancesFromCentroids.Length; i++)
                    {
                        if (distancesFromCentroids[centroidWithMin] > distancesFromCentroids[i])
                        {
                            centroidWithMin = i;
                        }
                    }
                    cluster.Cluster = centroidWithMin;
                }
                return(clusters);
            }

            void UpdateCentroids(ref List <KMeansData> centroidsList, List <KMeansData> clusters, out bool updated)
            {
                updated = false;
                for (int i = 0; i < centroidsList.Count; i++)
                {
                    List <KMeansData> membersOfCluster = clusters.FindAll(member => member.Cluster == i);
                    KMeansData        sumOfData        = new KMeansData();
                    for (int j = 0; j < membersOfCluster.Count; j++)
                    {
                        foreach (var hobbiesKey in centroidsList[0].Hobbies.Keys)
                        {
                            sumOfData.Hobbies[hobbiesKey] += membersOfCluster[j].Hobbies[hobbiesKey];
                        }
                    }
                    KMeansData averageCluster = new KMeansData();
                    foreach (var hobbiesKey in clusters[0].Hobbies.Keys)
                    {
                        averageCluster.Hobbies[hobbiesKey] = sumOfData.Hobbies[hobbiesKey] / membersOfCluster.Count;
                    }
                    if (!updated)
                    {
                        if (!CompareDictionaries(centroidsList[i].Hobbies, averageCluster.Hobbies))
                        {
                            updated = true;
                        }
                    }
                    centroidsList[i] = averageCluster;
                }
            }
        }
示例#8
0
        public async Task StandardiseData()
        {
            //await RemoveData();
            List <KMeansData> data = new List <KMeansData>();
            var users = await GetUsers();

            // (age - average) / range (max-min) // interests

            // AGE STANDARDISE //
            double ageSum = 0;
            double maxAge = Double.MinValue, minAge = Double.MaxValue;

            foreach (var user in users)
            {
                if (maxAge < user.Age)
                {
                    maxAge = user.Age;
                }
                if (minAge > user.Age)
                {
                    minAge = user.Age;
                }
                ageSum += user.Age;
            }
            double avgAge = ageSum / users.Count;
            double range  = maxAge - minAge;
            // AGE //
            double     interestRange  = 1; // max 1, min 0 // FIND RANGE OF EACH INTEREST
            KMeansData sumOfInterests = new KMeansData();
            KMeansData avgOfInterests = new KMeansData();

            foreach (var user in users)
            {
                //ages.Add((user.Age - ageMean)/ageSD);
                KMeansData kuser = new KMeansData {
                    Id = user.Id, Hobbies = { ["Age"] = (user.Age - avgAge) / range }
                };
                foreach (var userInterest in user.Interests)
                {
                    kuser.Hobbies[userInterest]           = 1;
                    sumOfInterests.Hobbies[userInterest] += 1;
                }
                data.Add(kuser);
                //await _database.KMeansInsert(kuser);
            }

            // average for each hobby
            foreach (var interest in sumOfInterests.Hobbies.Keys)
            {
                avgOfInterests.Hobbies[interest] = sumOfInterests.Hobbies[interest] / users.Count;
            }

            // finalizing the data
            foreach (var kMeansData in data)
            {
                foreach (var hobby in avgOfInterests.Hobbies.Keys)
                {
                    kMeansData.Hobbies[hobby] =
                        (kMeansData.Hobbies[hobby] - avgOfInterests.Hobbies[hobby]) / interestRange;
                }
                await KMeansInsert(kMeansData);
            }
        }
示例#9
0
 public async Task InsertCentroids(KMeansData centroids)
 {
     await _database.GetCollection <KMeansData>("Centroids").InsertOneAsync(centroids);
 }