async Task <KMeansData> StandardiseData(UserModel model) { //TODO: range age, avg age | max 1 min 0 avg of interests KMeansStatistics statistics = await GetStatistics(); KMeansData kuser = new KMeansData { Id = model.Id, Hobbies = { ["Age"] = (model.Age - statistics.AvgAge) / statistics.AgeRange } }; foreach (var userInterest in model.Interests) { kuser.Hobbies[userInterest] = 1; } //await _database.KMeansInsert(kuser); // finalizing the data KMeansData sample = new KMeansData(); foreach (var hobby in sample.Hobbies.Keys) { kuser.Hobbies[hobby] = (kuser.Hobbies[hobby] - statistics.HobbiesAverages[hobby]) / statistics.HobbiesRange; } return(kuser); }
async Task <KMeansStatistics> GetStatistics() { var users = await GetUsers(); KMeansStatistics statistics = new KMeansStatistics(); //if(await _database.GetCollection<KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync() != null) return await _database.GetCollection<KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync(); if (users.Count != 0 && users.Count % 300 == 0) // testing purposes / update statistics each 300 users to get accurate centroids { statistics.AgeRange = _database.GetCollection <UserModel>("Users").Find(_ => true) .Sort(Builders <UserModel> .Sort.Descending("Age")).FirstOrDefault().Age - _database.GetCollection <UserModel>("Users").Find(_ => true) .Sort(Builders <UserModel> .Sort.Ascending("Age")).FirstOrDefault().Age; double avgAge = 0; foreach (var userModel in users) { avgAge += userModel.Age; } statistics.AvgAge = avgAge / users.Count; // Hobbies average KMeansData sumOfInterests = new KMeansData(); foreach (var user in users) { foreach (var userInterest in user.Interests) { sumOfInterests.Hobbies[userInterest] += 1; } } // average for each hobby foreach (var interest in sumOfInterests.Hobbies.Keys) { statistics.HobbiesAverages[interest] = sumOfInterests.Hobbies[interest] / users.Count; } await _database.DropCollectionAsync("KMeansStatistics"); await _database.GetCollection <KMeansStatistics>("KMeansStatistics").InsertOneAsync(statistics); return(statistics); } else { return(await _database.GetCollection <KMeansStatistics>("KMeansStatistics").Find(_ => true).FirstOrDefaultAsync()); } }