public RecommenderSystemEngine(RankingDB db) { usr2usr.Add(PredictionMethod.Pearson, new SimilartyData(Pearson)); usr2usr.Add(PredictionMethod.Cosine, new SimilartyData(Cosine)); _db = db; MinSimilarity = 0.7; }
//Compute the hit ratio of all the methods in the list for a given train-test split (e.g. 0.95 train set size) /*public Dictionary<string, double> ComputeHitRatio(List<PredictionMethod> lMethods, double dTrainSetSize) * { * Dictionary<string, double> res = new Dictionary<string, double>(); * foreach (var m in lMethods) * { * res.Add(m.ToString(), CalcHitRatio(m, dTrainSetSize)); * } * return res; * }*/ /*private int GetGrade(PredictionMethod m, RankingDB train, RankingDB test) * { * var engineTrain = SetEngine(train); * * * int grade = 0; * var allTest = test.GetAllData(); * foreach (var rankData in allTest) * { * double pretictedRating = Math.Round(engineTrain.PredictRating(m, rankData.ranker, rankData.item)); * if (pretictedRating == rankData.rank) ++grade; * } * return grade; * } * * private RecommenderSystemEngine SetEngine(RankingDB train) * { * var engineTrain = new RecommenderSystemEngine(train); * if (_engine.Mode == FilterMode.MaxUsers) * engineTrain.MaxUsers = _engine.MaxUsers; * else * engineTrain.MinSimilarity = _engine.MinSimilarity; * return engineTrain; * }*/ // the train and test are initiliazed private void DivideDB(RankingDB allRatings, double dTrainSetSize, RankingDB train, RankingDB test) { var users = allRatings.GetRankers(); //quick add and contains HashSet <string> chosenUsers = new HashSet <string>(); int testSize = (int)Math.Round((1 - dTrainSetSize) * allRatings.NumOfRanks()); while (test.NumOfRanks() < testSize) { var currentUser = chooseNewRandomUser(users, chosenUsers); chosenUsers.Add(currentUser); IEnumerable <RankingDB.RankData> randomRanks = chooseSomeRandomRanks(currentUser, allRatings); train.Add(randomRanks); var other = otherRanks(currentUser, randomRanks, allRatings); int otherCount = allRatings.NumOfRanks(currentUser) - randomRanks.Count(); int diff = otherCount - (testSize - test.NumOfRanks()); if (diff > 0) //other count > free space in test { train.Add(other.Take(diff)); test.Add(other.Skip(diff)); } else { test.Add(other); } } foreach (var usr in users) { if (!chosenUsers.Contains(usr)) { train.AddUser(usr, allRatings.GetUserData(usr)); } } }
public void Load(string sFileName, double dTrainSetSize) { RankingDB db = Load(sFileName); _test = new RankingDB(); _train = new RankingDB(); DivideDB(db, dTrainSetSize, _train, _test); _trainEngine = new RecommenderSystemEngine(_train); }
//load a dataset from a file public RankingDB Load(string sFileName) { RankingDB db = new RankingDB(); //_engine = new RecommenderSystemEngine(_ratings); var lines = File.ReadLines(sFileName); //var lines = LoadPart(sFileName, 50000); foreach (var line in lines) { var parts = line.Split(','); db.Add(parts[0], parts[1], int.Parse(parts[2])); } return(db); }
private IEnumerable <RankingDB.RankData> chooseSomeRandomRanks(string user, RankingDB allRatings) { //var usrRanks = allRatings.GetRanksAndItems(user); var items = allRatings.getItems(user); Random rand = new Random(); int k = rand.Next(1, items.Count() + 1); RankingDB.RankData[] res = new RankingDB.RankData[k]; for (int i = 0; i < k; i++) { int index = rand.Next(items.Count()); string item = items.ElementAt(index); res[i] = new RankingDB.RankData(user, item, allRatings.GetRank(user, item).Value); } return(res); }
public void TrainBaseModel(int cFeatures) { double avg = (_train.SumRanks() + _test.SumRanks()) / (_train.NumOfRanks() + _test.NumOfRanks()); var svd = new SVD(avg, cFeatures); RankingDB train = new RankingDB(); RankingDB validation = new RankingDB(); DivideDB(_train, 0.95, train, validation); var ranks = train.GetAllData(); double RMSE = double.MaxValue; double LastRMSE = double.MaxValue; while (RMSE <= LastRMSE) { LastRMSE = RMSE; svd.Train(ranks); _trainEngine.setSVD(svd); RMSE = ComputeRMSE(PredictionMethod.SVD, _trainEngine, validation.GetAllData()); } //_engine.setSVD(svd); }
private double ComputeRMSE(PredictionMethod m, RankingDB db) { return(ComputeRMSE(m, new RecommenderSystemEngine(db), db.GetAllData())); }
private IEnumerable <RankingDB.RankData> otherRanks(string currentUser, IEnumerable <RankingDB.RankData> ranks, RankingDB allRatings) { return(allRatings.GetRanksAndItems(currentUser).Except(ranks)); }