void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data) { var overlap = Overlap.ComputeUShort(entity_data); for (int x = 0; x < NumEntities; x++) for (int y = 0; y < x; y++) this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y)); }
void ComputeCorrelationsUIntOverlap(IBooleanMatrix entity_data) { var overlap = Overlap.ComputeUInt(entity_data); // compute correlations for (int x = 0; x < num_entities; x++) for (int y = 0; y < x; y++) { this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y)); this[y, x] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(y), entity_data.NumEntriesByRow(x)); } }
void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data) { var overlap = Overlap.ComputeUShort(entity_data); // compute correlation for (int x = 0; x < num_entities; x++) { for (int y = 0; y < x; y++) { this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y)); this[y, x] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(y), entity_data.NumEntriesByRow(x)); } } }
void ComputeCorrelationsUIntOverlap(IBooleanMatrix entity_data) { var overlap = Overlap.ComputeUInt(entity_data); for (int x = 0; x < NumEntities; x++) { for (int y = 0; y < x; y++) { this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y)); } } }
/// public override void ComputeCorrelations(IBooleanMatrix entity_data) { var transpose = entity_data.Transpose(); var overlap = new SparseMatrix <int>(entity_data.NumberOfRows, entity_data.NumberOfRows); // go over all (other) entities for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++) { var row = ((IBooleanMatrix)transpose).GetEntriesByRow(row_id); for (int i = 0; i < row.Count; i++) { int x = row[i]; for (int j = i + 1; j < row.Count; j++) { int y = row[j]; if (x < y) { overlap[x, y]++; } else { overlap[y, x]++; } } } } // the diagonal of the correlation matrix for (int i = 0; i < num_entities; i++) { this[i, i] = 1; } // compute cosine foreach (var index_pair in overlap.NonEmptyEntryIDs) { int x = index_pair.First; int y = index_pair.Second; this[x, y] = (float)(overlap[x, y] / Math.Sqrt(entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y))); } }
/// <summary>Optimizes the specified data</summary> /// <param name="data">data</param> /// <param name="inverse_data">data</param> /// <param name="W">W</param> /// <param name="H">H</param> void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix<double> W, Matrix<double> H) { var HH = new Matrix<double>(num_factors, num_factors); var HC_minus_IH = new Matrix<double>(num_factors, num_factors); var HCp = new double[num_factors]; var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors); MathNet.Numerics.LinearAlgebra.Matrix m_inv; // TODO speed up using more parts of that library // TODO using properties gives a 3-5% performance penalty // source code comments are in terms of computing the user factors // works the same with users and items exchanged // (1) create HH in O(f^2|Items|) // HH is symmetric for (int f_1 = 0; f_1 < num_factors; f_1++) for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = 0; for (int i = 0; i < H.dim1; i++) d += H[i, f_1] * H[i, f_2]; HH[f_1, f_2] = d; } // (2) optimize all U // HC_minus_IH is symmetric for (int u = 0; u < W.dim1; u++) { var row = data.GetEntriesByRow(u); // prepare KDD Cup specific weighting int num_user_items = row.Count; int user_positive_weight_sum = 0; foreach (int i in row) user_positive_weight_sum += inverse_data.NumEntriesByRow(i); double neg_weight_normalization = (double) (num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum); // TODO precompute // TODO check whether this is correct // create HC_minus_IH in O(f^2|S_u|) for (int f_1 = 0; f_1 < num_factors; f_1++) for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = 0; foreach (int i in row) //d += H[i, f_1] * H[i, f_2] * (c_pos - 1); d += H[i, f_1] * H[i, f_2] * CPos; HC_minus_IH[f_1, f_2] = d; } // create HCp in O(f|S_u|) for (int f = 0; f < num_factors; f++) { double d = 0; for (int i = 0; i < inverse_data.NumberOfRows; i++) if (row.Contains(i)) d += H[i, f] * (1 + CPos); else d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization; HCp[f] = d; } // create m = HH + HC_minus_IH + reg*I // m is symmetric // the inverse m_inv is symmetric for (int f_1 = 0; f_1 < num_factors; f_1++) for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2]; if (f_1 == f_2) d += Regularization; m[f_1, f_2] = d; } m_inv = m.Inverse(); // write back optimal W for (int f = 0; f < num_factors; f++) { double d = 0; for (int f_2 = 0; f_2 < num_factors; f_2++) d += m_inv[f, f_2] * HCp[f_2]; W[u, f] = d; } } }
void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data) { var transpose = entity_data.Transpose() as IBooleanMatrix; var overlap = new SymmetricMatrix<ushort>(entity_data.NumberOfRows); // go over all (other) entities for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++) { var row = transpose.GetEntriesByRow(row_id); for (int i = 0; i < row.Count; i++) { int x = row[i]; for (int j = i + 1; j < row.Count; j++) overlap[x, row[j]]++; } } // the diagonal of the correlation matrix for (int i = 0; i < num_entities; i++) this[i, i] = 1; // compute cosine for (int x = 0; x < num_entities; x++) for (int y = 0; y < x; y++) { long size_product = entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y); if (size_product > 0) this[x, y] = (float) (overlap[x, y] / Math.Sqrt(size_product)); } }
/// <summary>Optimizes the specified data</summary> /// <param name="data">data</param> /// <param name="inverse_data">data</param> /// <param name="W">W</param> /// <param name="H">H</param> void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix <double> W, Matrix <double> H) { var HH = new Matrix <double>(num_factors, num_factors); var HC_minus_IH = new Matrix <double>(num_factors, num_factors); var HCp = new double[num_factors]; var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors); MathNet.Numerics.LinearAlgebra.Matrix m_inv; // TODO speed up using more parts of that library // TODO using properties gives a 3-5% performance penalty // source code comments are in terms of computing the user factors // works the same with users and items exchanged // (1) create HH in O(f^2|Items|) // HH is symmetric for (int f_1 = 0; f_1 < num_factors; f_1++) { for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = 0; for (int i = 0; i < H.dim1; i++) { d += H[i, f_1] * H[i, f_2]; } HH[f_1, f_2] = d; } } // (2) optimize all U // HC_minus_IH is symmetric for (int u = 0; u < W.dim1; u++) { var row = data.GetEntriesByRow(u); // prepare KDD Cup specific weighting int num_user_items = row.Count; int user_positive_weight_sum = 0; foreach (int i in row) { user_positive_weight_sum += inverse_data.NumEntriesByRow(i); } double neg_weight_normalization = (double)(num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum); // TODO precompute // TODO check whether this is correct // create HC_minus_IH in O(f^2|S_u|) for (int f_1 = 0; f_1 < num_factors; f_1++) { for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = 0; foreach (int i in row) { //d += H[i, f_1] * H[i, f_2] * (c_pos - 1); d += H[i, f_1] * H[i, f_2] * CPos; } HC_minus_IH[f_1, f_2] = d; } } // create HCp in O(f|S_u|) for (int f = 0; f < num_factors; f++) { double d = 0; for (int i = 0; i < inverse_data.NumberOfRows; i++) { if (row.Contains(i)) { d += H[i, f] * (1 + CPos); } else { d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization; } } HCp[f] = d; } // create m = HH + HC_minus_IH + reg*I // m is symmetric // the inverse m_inv is symmetric for (int f_1 = 0; f_1 < num_factors; f_1++) { for (int f_2 = 0; f_2 < num_factors; f_2++) { double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2]; if (f_1 == f_2) { d += Regularization; } m[f_1, f_2] = d; } } m_inv = m.Inverse(); // write back optimal W for (int f = 0; f < num_factors; f++) { double d = 0; for (int f_2 = 0; f_2 < num_factors; f_2++) { d += m_inv[f, f_2] * HCp[f_2]; } W[u, f] = d; } } }
/// public override void ComputeCorrelations(IBooleanMatrix entity_data) { var transpose = entity_data.Transpose(); var overlap = new SparseMatrix<int>(entity_data.NumberOfRows, entity_data.NumberOfRows); // go over all (other) entities for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++) { var row = ((IBooleanMatrix) transpose).GetEntriesByRow(row_id); for (int i = 0; i < row.Count; i++) { int x = row[i]; for (int j = i + 1; j < row.Count; j++) { int y = row[j]; if (x < y) overlap[x, y]++; else overlap[y, x]++; } } } // the diagonal of the correlation matrix for (int i = 0; i < num_entities; i++) this[i, i] = 1; // compute cosine foreach (var index_pair in overlap.NonEmptyEntryIDs) { int x = index_pair.First; int y = index_pair.Second; this[x, y] = (float) (overlap[x, y] / Math.Sqrt(entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y) )); } }
/// public override void ComputeCorrelations(IBooleanMatrix entity_data) { var transpose = entity_data.Transpose() as IBooleanMatrix; var overlap = new SymmetricMatrix<int>(entity_data.NumberOfRows); // go over all (other) entities for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++) { var row = transpose.GetEntriesByRow(row_id); for (int i = 0; i < row.Count; i++) { int x = row[i]; for (int j = i + 1; j < row.Count; j++) { int y = row[j]; overlap[x, y]++; } } } // the diagonal of the correlation matrix for (int i = 0; i < num_entities; i++) this[i, i] = 1; // compute Jaccard index for (int x = 0; x < num_entities; x++) for (int y = 0; y < x; y++) this[x, y] = (float) (overlap[x, y] / (entity_data.NumEntriesByRow(x) + entity_data.NumEntriesByRow(y) - overlap[x, y])); }