public static List<Sample> Resampling (Segment segment) { List<Sample> result = new List<Sample>(); Matrix[] Evaluation_Matrix = new Matrix[10]; Matrix[] Training_Segment_Matrix = new Matrix[10]; Matrix[] Training_Root_Matrix = new Matrix[10]; for (int i = 0; i < 10; i++) { Evaluation_Matrix[i] = new Matrix(segment.data.rows, segment.data.cols); Training_Segment_Matrix[i] = segment.data.Duplicate(); Training_Root_Matrix[i] = segment.data.Duplicate(); } int count_segment = 0; int need = segment.data.CountCells() / 10; Random rand = new Random(); for (int i = 0; i < segment.data.rows; i++) { for (int j = 0; j < segment.data.cols; j++) { if (segment.data[i, j] != 0) { bool isUsed = false; while (!isUsed) { int k = rand.Next(0, 10); if (Evaluation_Matrix[k].CountCells() < need + 1) { isUsed = true; Evaluation_Matrix[k][i, j] = segment.data[i, j]; Training_Segment_Matrix[k][i, j] = 0; Training_Root_Matrix[k][i, j] = 0; } } } } } for (int i = 0; i < 10; i++) { Segment Evaluation_Segment = new Segment(segment, Evaluation_Matrix[i]); Segment Training_Segment = new Segment(segment, Training_Segment_Matrix[i]); Sample sample = new Sample(Evaluation_Segment, Training_Segment); sample.Training_root = new Segment(segment, Training_Root_Matrix[i]); result.Add(sample); } return result; }
public double MSE(Segment segment) { double sum = 0; for (int u = 0; u < Uij.Length; u++) { sum += Math.Pow(segment.data[Uij[u], j] - ExpertScore(segment.data[Uij[u], j], segment), 2); } //sum = 0 if (sum == 0) return 0.01; else return sum / Uij.Length; }
public Sample(DataTable EvalTable, DataTable TrainingTable) { Segment EvalSegment = new Segment(); EvalSegment.data = new Matrix(EvalTable.Rows.Count - 1, EvalTable.Columns.Count - 2); EvalSegment.user_id = new int[EvalTable.Rows.Count - 1]; EvalSegment.item_id = new string[EvalTable.Columns.Count - 2]; EvalSegment.avgRatingByItem = new double[EvalTable.Columns.Count - 2]; EvalSegment.avgRatingByUser = new double[EvalTable.Rows.Count - 1]; //item for (int i = 0; i < EvalSegment.item_id.Length; i++) { EvalSegment.item_id[i] = EvalTable.Columns[i + 2].ColumnName.Trim(); EvalSegment.avgRatingByItem[i] = Convert.ToDouble(EvalTable.Rows[0][i + 2] == "" ? 0 : EvalTable.Rows[0][i + 2]); } for (int i = 0; i < EvalSegment.data.rows; i++) { EvalSegment.user_id[i] = Convert.ToInt32(EvalTable.Rows[i + 1][0]); EvalSegment.avgRatingByUser[i] = Convert.ToDouble(EvalTable.Rows[i + 1][1] == "" ? 0 : EvalTable.Rows[i + 1][1]); for (int j = 0; j < EvalSegment.data.cols; j++) EvalSegment.data[i, j] = Convert.ToDouble(EvalTable.Rows[i + 1][j + 2] == "" ? 0 : EvalTable.Rows[i + 1][j + 2]); } Segment TrainingSegment = new Segment(); TrainingSegment.data = new Matrix(TrainingTable.Rows.Count - 1, TrainingTable.Columns.Count - 2); TrainingSegment.user_id = new int[TrainingTable.Rows.Count - 1]; TrainingSegment.item_id = new string[TrainingTable.Columns.Count - 2]; TrainingSegment.avgRatingByItem = new double[TrainingTable.Columns.Count - 2]; TrainingSegment.avgRatingByUser = new double[TrainingTable.Rows.Count - 1]; //item for (int i = 0; i < TrainingSegment.item_id.Length; i++) { TrainingSegment.item_id[i] = TrainingTable.Columns[i + 2].ColumnName.Trim(); TrainingSegment.avgRatingByItem[i] = Convert.ToDouble(TrainingTable.Rows[0][i + 2] == "" ? 0 : TrainingTable.Rows[0][i + 2]); } for (int i = 0; i < TrainingSegment.data.rows; i++) { TrainingSegment.user_id[i] = Convert.ToInt32(TrainingTable.Rows[i + 1][0]); TrainingSegment.avgRatingByUser[i] = Convert.ToDouble(TrainingTable.Rows[i + 1][1] == "" ? 0 : TrainingTable.Rows[i + 1][1]); for (int j = 0; j < TrainingSegment.data.cols; j++) TrainingSegment.data[i, j] = Convert.ToDouble(TrainingTable.Rows[i + 1][j + 2] == "" ? 0 : TrainingTable.Rows[i + 1][j + 2]); } this.Evaluation = EvalSegment; this.Training = TrainingSegment; }
public double ExpertScore(double r_aj, Segment segment)// i: item cần tính, j: item mà active user đã rate với giá trị x { double alpha = 0; double beta = 0; double formular_alpha_1 = 0, formular_alpha_2 = 0; for (int u = 0; u < Uij.Length; u++) { formular_alpha_1 += (segment.data[Uij[u], i] - segment.avgRatingByItem[i]) * (segment.data[Uij[u], j] - segment.avgRatingByItem[j]); formular_alpha_2 += Math.Pow(segment.data[Uij[u], i] - segment.avgRatingByItem[i], 2); } alpha = formular_alpha_1 / formular_alpha_2; beta = segment.avgRatingByItem[i] - alpha * segment.avgRatingByItem[j]; return alpha * r_aj + beta; }
public Segment (Segment segment, Matrix data) { this.id = segment.id; this.time = segment.time; this.budget = segment.budget; this.companion = segment.companion; //this.familiarity = segment.familiarity; //this.mood = segment.mood; //this.travelLength = segment.travelLength; this.weather = segment.weather; this.user_id = segment.user_id; this.item_id = segment.item_id; this.avgRatingByItem = segment.avgRatingByItem; this.avgRatingByUser = segment.avgRatingByUser; this.data = data; }
public double ExpertScore(Segment segment)// i: item cần tính, j: item expert { double alpha = 0; double beta = 0; double formular_alpha_1 = 0, formular_alpha_2 = 0; double r_uj = 0;//trung bình rating cho item j của các user trên tập giao của i, j for (int u = 0; u < Uij.Length; u++) { formular_alpha_1 += (segment.data[u, i] - segment.avgRatingByItem[i]) * (segment.data[u, j] - segment.avgRatingByItem[j]); formular_alpha_2 += Math.Pow(segment.data[u, i] - segment.avgRatingByItem[i], 2); r_uj += segment.data[u, j]; } r_uj = r_uj / Uij.Length; alpha = formular_alpha_1 / formular_alpha_2; beta = segment.avgRatingByItem[i] - alpha * segment.avgRatingByItem[j]; return alpha * r_uj + beta; }
int[] Uij {get; set;} //u_index (not id) public Regression(int i, int j, Segment segment) { this.i = i; this.j = j; /*int i_index = -1, j_index = -1; for (int index = 0; index < segment.item_id.Length; index++) { if (Convert.ToInt32(segment.item_id[index].Trim()) == i) i_index = index; if (Convert.ToInt32(segment.item_id[index].Trim()) == j) j_index = index; if (i_index != -1 && j_index != -1) break; }*/ List<int> L_Uij = new List<int>(); for (int index = 0; index < segment.data.rows; index++) { //if (segment.data[index, i_index] != 0 && segment.data[index, j_index] != 0) if (segment.data[index, i] != 0 && segment.data[index, j] != 0) L_Uij.Add(index); } this.Uij = L_Uij.ToArray(); }
public Sample(Segment Eval, Segment Training) { this.Evaluation = Eval; this.Training = Training; }
public static double Correlation(int i, int j, int k, Segment segment) { Regression R_ij = new Regression(i, j, segment); Regression R_ik = new Regression(i, k, segment); Regression R_jk = new Regression(j, k, segment); double formular1 = 0, formular2 = 0; double Cijj = R_ij.MSE(segment); double Cikk = R_ik.MSE(segment); double temp = 0; //EU_jk[(F_ji - F_ki)^2] int count_temp = 0; for (int u = 0; u < R_jk.Uij.Length; u++) { int u_index = R_jk.Uij[u]; if (segment.data[u_index, i] != 0) { temp += Math.Pow(R_ij.ExpertScore(segment.data[u_index, i], segment) - R_ik.ExpertScore(segment.data[u_index, i], segment), 2); count_temp++; } } //temp = temp / R_jk.Uij.Length; temp = temp / count_temp; formular1 = Cijj + Cikk - temp; formular2 = 2 * Math.Sqrt(Cijj * Cikk); return formular1 / formular2; }
public static double Prediction(int active_user_id, int item_id, Segment segment) { //index of item_id in segment data int item_id_index = -1; for (int i = 0; i < segment.item_id.Length; i++) { if (Convert.ToInt32(segment.item_id[i].Trim()) == item_id) { item_id_index = i; break; } } for(int a = 0; a < segment.data.rows; a++) { if (segment.user_id[a] == active_user_id) { List<double> L_MSE_ij = new List<double>(); // i,i: item_id_index List<int> Ia = new List<int>();// Item id index của active user a double MSE_Min = -9999; for (int j = 0; j < segment.data.cols - 1; j++) //Bỏ unknown item { if (segment.data[a, j] != 0) { Ia.Add(j); Regression r = new Regression(item_id_index, j, segment); double MSE_ij = r.MSE(segment); L_MSE_ij.Add(MSE_ij); if (MSE_Min == -9999 || MSE_ij < MSE_Min) MSE_Min = MSE_ij; } } List<double> L_MSE_Diag = new List<double>(); // {C*_i}_jj double Inv_Sum_MSE_Diag = 0; foreach (double MSE_ij in L_MSE_ij) { double MSE_Diag = MSE_ij - segment.Correlation_Avg * MSE_Min; L_MSE_Diag.Add(MSE_Diag); Inv_Sum_MSE_Diag += 1 / MSE_Diag; } int[] _Ia = Ia.ToArray(); double[] _L_MSE_Diag = L_MSE_Diag.ToArray(); double predict_ai = 0; for(int j = 0; j < _Ia.Length; j++) { double wji = 1 / (_L_MSE_Diag[j] * Inv_Sum_MSE_Diag); Regression r = new Regression(item_id_index, _Ia[j], segment); predict_ai += wji * r.ExpertScore(segment.data[a, _Ia[j]], segment); } return predict_ai; } } return 0; }
public bool IsChildOf(Segment other) { return (budget.id == other.budget.id || other.budget.id == 0) && (companion.id == other.companion.id || other.companion.id == 0) //&& (familiarity.id == other.familiarity.id || other.familiarity.id == 0) //&& (mood.id == other.mood.id || other.mood.id == 0) //&& (temperature.id == other.temperature.id || other.temperature.id == 0) //&& (travelLength.id == other.travelLength.id || other.travelLength.id == 0) && (weather.id == other.weather.id || other.weather.id == 0); }
private void btnTestML_Click(object sender, EventArgs e) { Stopwatch swglobal = new Stopwatch(); Stopwatch swtemp = new Stopwatch(); swglobal.Start(); swtemp.Start(); txtLogResample.AppendText("Process started: " + DateTime.Now); txtLogResample.AppendText("\r\nReading Data from Cube ... "); //Get data Segment segment = new Segment(); string mdx = "with " + "member Measures.[Ratings AVG] as" + "( " + "[Measures].[Rating]/[Measures].[Ratings100k Count]" + ")" + "select " + "[User100k].[Id].Members on rows, " + "[Movies100k].[Id].Members on columns " + "from Movielens " + "where Measures.[Ratings AVG]"; DataTable result = DbHelper.RunMDXWithDataTable(mdx); segment.data = new Matrix(result.Rows.Count - 1, result.Columns.Count - 2); segment.user_id = new int[result.Rows.Count - 1]; segment.item_id = new string[result.Columns.Count - 2]; segment.avgRatingByItem = new double[result.Columns.Count - 2]; segment.avgRatingByUser = new double[result.Rows.Count - 1]; //item for (int i = 0; i < segment.item_id.Length; i++) { segment.item_id[i] = result.Columns[i + 2].ColumnName.Trim(); segment.avgRatingByItem[i] = Convert.ToDouble(result.Rows[0][i + 2] == "" ? 0 : result.Rows[0][i + 2]); } for (int i = 0; i < segment.data.rows - 1; i++) { segment.user_id[i] = Convert.ToInt32(result.Rows[i + 1][0]); segment.avgRatingByUser[i] = Convert.ToDouble(result.Rows[i + 1][1] == "" ? 0 : result.Rows[i + 1][1]); for (int j = 0; j < segment.data.cols; j++) segment.data[i, j] = Convert.ToDouble(result.Rows[i + 1][j + 2] == "" ? 0 : result.Rows[i + 1][j + 2]); } swtemp.Stop(); txtLogResample.AppendText(swtemp.Elapsed.ToString()); txtLogResample.AppendText("\r\nStart resampling (this may take a long time) ... "); swtemp.Start(); List<Sample> resamples = Sample.Resampling(segment); swtemp.Stop(); txtLogResample.AppendText(swtemp.Elapsed.ToString()); txtLogResample.AppendText("\r\nExporting result ..."); swtemp.Start(); int count = 0; foreach (Sample sample in resamples) { count++; sample.Export("D:\\Temp\\TestResampling\\" + count + ".xlsx"); } swtemp.Stop(); txtLogResample.AppendText(swtemp.Elapsed.ToString()); swglobal.Stop(); txtLogResample.AppendText("\r\nProcess ended: " + DateTime.Now + " - Total time: " + swglobal.Elapsed.ToString()); /* //Evaluate double performamce_segment = 0; double correlation_avg_segment = 0; foreach (Sample sample in resamples) { double temp_corr_avg_segment = sample.Train(); correlation_avg_segment += temp_corr_avg_segment; performamce_segment += sample.Test(temp_corr_avg_segment); } MessageBox.Show("Done"); txtPerformance.Text = Convert.ToString(performamce_segment / 10); txtCorrelation.Text = Convert.ToString(correlation_avg_segment / 10); */ }
/* * IEquatable */ public bool Equals(Segment other) { //Check whether the compared object is null. if (Object.ReferenceEquals(other, null)) return false; //Check whether the compared object references the same data. if (Object.ReferenceEquals(this, other)) return true; //Check whether the products' properties are equal. return budget.id.Equals(other.budget.id) && companion.id.Equals(other.companion.id) //&& familiarity.id.Equals(other.familiarity.id) //&& mood.id.Equals(other.mood.id) //&& temperature.id.Equals(other.temperature.id) //&& travelLength.id.Equals(other.travelLength.id) && weather.id.Equals(other.weather.id); }
public static Segment[] GetCandidates() { DataTable data = DbHelper.RunScriptsWithTable(string.Format("select * from segments order by performance asc", "Data Warehouse"), "Data Warehouse"); Segment[] candidates = new Segment[data.Rows.Count]; for (int i = 0; i < data.Rows.Count; i++) { Segment obj = new Segment(); obj.id = Convert.ToInt32(data.Rows[i][0]); //Time string period_of_day = Convert.ToString(data.Rows[i][1]); string period_of_week = Convert.ToString(data.Rows[i][2]); string season = Convert.ToString(data.Rows[i][3]); obj.time = new Time(period_of_day, period_of_week, season); obj.budget = new Budget(Convert.ToInt32(data.Rows[i][4])); obj.companion = new Companion(Convert.ToInt32(data.Rows[i][5])); obj.weather = new Weather(Convert.ToInt32(data.Rows[i][6])); obj.Performance = Convert.ToDouble(data.Rows[i][7]); obj.Correlation_Avg = Convert.ToDouble(data.Rows[i][8]); candidates[i] = obj; } return candidates; }
public static List<Segment> GetAllSegment() { List<Segment> result = new List<Segment>(); foreach(Time time in Time.GetAll()) foreach(Budget budget in Budget.GetAllData()) foreach(Companion companion in Companion.GetAllData()) //foreach(Familiarity familiarity in Familiarity.GetAllData()) //foreach(Mood mood in Mood.GetAllData()) //foreach(Temperature temperature in Temperature.GetAllData()) //foreach(TravelLength travelLength in TravelLength.GetAllData()) foreach(Weather weather in Weather.GetAllData()) { if (time.period_of_day == Time.Period_Of_Day.All && time.period_of_week == Time.Period_Of_Week.All && time.season == Time.Season.All && budget.id == 0 && companion.id == 0 && weather.id == 0) continue; else { Segment segment = new Segment(); segment.time = time; segment.budget = budget; segment.companion = companion; //segment.familiarity = familiarity; //segment.mood = mood; //segment.temperature = temperature; //segment.travelLength = travelLength; segment.weather = weather; segment.GetData(); if (segment.data.CountCells() > 100) result.Add(segment); } } return result; }
/* * Static Methods */ public static Segment GetRoot() { Segment root = new Segment(); string mdx = "with member Measures.[Avg_Ratings] as " + "([Measures].[Sum_Ratings]/[Measures].[Count_Ratings]) " + "select " + "[Dim Place].[Place Key].Members on columns, " + "[Dim User].[User Key].Members on rows " + "from [Travel H2V DW]" + "where Measures.[Avg_Ratings]"; DataTable result = DbHelper.RunMDXWithDataTable(mdx); root.data = new Matrix(result.Rows.Count - 1, result.Columns.Count - 2); root.user_id = new int[result.Rows.Count - 1]; root.item_id = new string[result.Columns.Count - 2]; root.avgRatingByItem = new double[result.Columns.Count - 2]; root.avgRatingByUser = new double[result.Rows.Count - 1]; //item for (int i = 0; i < root.item_id.Length; i++) { root.item_id[i] = result.Columns[i + 2].ColumnName.Trim(); root.avgRatingByItem[i] = Convert.ToDouble(result.Rows[0][i + 2] == "" ? 0 : result.Rows[0][i + 2]); } for (int i = 0; i < root.data.rows; i++) { root.user_id[i] = Convert.ToInt32(result.Rows[i + 1][0]); root.avgRatingByUser[i] = Convert.ToDouble(result.Rows[i + 1][1] == "" ? 0 : result.Rows[i + 1][1]); for (int j = 0; j < root.data.cols; j++) root.data[i, j] = Convert.ToDouble(result.Rows[i + 1][j + 2] == "" ? 0 : result.Rows[i + 1][j + 2]); } return root; }
public static double Correlation_Avg(Segment segment) // Training Phase { double sum = 0; int count = 0; for (int i = 0; i < segment.item_id.Length - 3; i++) for (int j = i + 1; j < segment.item_id.Length - 2; j++) for (int k = j + 1; k < segment.item_id.Length - 1; k++) { double temp = Correlation(i, j, k, segment); if (Double.IsInfinity(temp)) temp = Correlation(i, j, k, segment); if (!Double.IsNaN(temp) && !Double.IsInfinity(temp)) { sum += temp; count++; } } /* for (int s = 0; s < segment.item_id.Length; s++) { Random rand = new Random(); int i = 1, j = 2, k = 3; while (i != j && i != k && j != k) { i = rand.Next(0, segment.item_id.Length - 1); j = rand.Next(0, segment.item_id.Length - 1); k = rand.Next(0, segment.item_id.Length - 1); } double temp = Correlation(i, j, k, segment); if (Double.IsInfinity(temp)) temp = Correlation(i, j, k, segment); if (!Double.IsNaN(temp) && !Double.IsInfinity(temp)) { sum += temp; count++; } }*/ return sum / count; }
public static bool GetStrongSegments() { //List<Segment> AllSegment = Segment.GetAllSegment(); Segment root = Segment.GetRoot(); double sum_Correlation_Avg_root = 0; int count_Correlation_Avg_root = 0; DbHelper.RunScripts("truncate table segments", "Data Warehouse"); //foreach (Segment segment in AllSegment) //{ foreach(Time time in Time.GetAll()) foreach(Budget budget in Budget.GetAllData()) foreach(Companion companion in Companion.GetAllData()) foreach (Weather weather in Weather.GetAllData()) { if (time.period_of_day == Time.Period_Of_Day.All && time.period_of_week == Time.Period_Of_Week.All && time.season == Time.Season.All && budget.id == 0 && companion.id == 0 && weather.id == 0) continue; else { Segment segment = new Segment(); segment.time = time; segment.budget = budget; segment.companion = companion; //segment.familiarity = familiarity; //segment.mood = mood; //segment.temperature = temperature; //segment.travelLength = travelLength; segment.weather = weather; segment.GetData(); if (segment.data.CountCells() < 10) continue; //result.Add(segment); //Resample segment double performamce_segment = 0; double performance_root = 0; double correlation_avg_segment = 0; List<Sample> resamples = Sample.Resampling(segment); foreach (Sample sample in resamples) { double temp_corr_avg_segment = sample.Train(); if (!Double.IsNaN(temp_corr_avg_segment)) { correlation_avg_segment += temp_corr_avg_segment; performamce_segment += sample.Test(temp_corr_avg_segment); } Sample sample_root = new Sample(sample.Evaluation, sample.Training_root); double temp_corr_root = sample_root.Train(); if (!Double.IsNaN(temp_corr_root)) { sum_Correlation_Avg_root += temp_corr_root; count_Correlation_Avg_root += 1; performance_root += sample_root.Test(sum_Correlation_Avg_root / count_Correlation_Avg_root); } } //if (performamce_segment < performance_root) //if (!Double.IsNaN(performamce_segment) && !Double.IsNaN(correlation_avg_segment)) if (Double.IsNaN(performamce_segment)) performamce_segment = -9999; if (Double.IsNaN(correlation_avg_segment)) correlation_avg_segment = -9999; if (Double.IsNaN(performance_root)) performance_root = -9999; try { DbHelper.RunScripts(string.Format("pr_insertSegment " + "'" + segment.time.period_of_day.ToString() + "'" + ", '" + segment.time.period_of_week.ToString() + "'" + ", '" + segment.time.season.ToString() + "'" + ", " + segment.budget.id + ", " + segment.companion.id + ", " + segment.weather.id + ", " + performamce_segment / 10 + ", " + correlation_avg_segment / 10 + ", " + performance_root / 10 + ", " + sum_Correlation_Avg_root / count_Correlation_Avg_root) , "Data Warehouse"); } catch (Exception ex) { } } //Remove segment "child" and have performance less than its parents /* Segment[] candidates = Segment.GetCandidates(); for (int i = 0; i < candidates.Length - 1; i++) { for (int j = i + 1; j < candidates.Length; j++) { if (candidates[j].IsChildOf(candidates[i])) { DbHelper.RunScripts(string.Format("delete from segments where id = " + candidates[j].id), "Data Warehouse"); } } }*/ } DbHelper.RunScripts(string.Format("pr_insertSegment " + "'All', 'All', 'All'" + ", " + 0 + ", " + 0 + ", " + 0 + ", " + 9999 + ", " + sum_Correlation_Avg_root / count_Correlation_Avg_root), "Data Warehouse"); return true; }