public Features(ALSWRFactorizer factorizer) { dataModel = factorizer.dataModel; numFeatures = factorizer.numFeatures; var random = RandomUtils.getRandom(); M = new double[dataModel.GetNumItems()][]; //numFeatures var itemIDsIterator = dataModel.GetItemIDs(); while (itemIDsIterator.MoveNext()) { long itemID = itemIDsIterator.Current; int itemIDIndex = factorizer.itemIndex(itemID); M[itemIDIndex] = new double[numFeatures]; M[itemIDIndex][0] = averateRating(itemID); for (int feature = 1; feature < numFeatures; feature++) { M[itemIDIndex][feature] = random.nextDouble() * 0.1; } } U = new double[dataModel.GetNumUsers()][]; //numFeatures for (int i = 0; i < U.Length; i++) { U[i] = new double[numFeatures]; } }
public override IList <IRecommendedItem> Recommend(long userID, int howMany, IDRescorer rescorer) { IDataModel dataModel = GetDataModel(); int numItems = dataModel.GetNumItems(); List <IRecommendedItem> result = new List <IRecommendedItem>(howMany); while (result.Count < howMany) { var it = dataModel.GetItemIDs(); it.MoveNext(); var skipNum = random.nextInt(numItems); for (int i = 0; i < skipNum; i++) { if (!it.MoveNext()) { break; } // skip() ?? } long itemID = it.Current; if (dataModel.GetPreferenceValue(userID, itemID) == null) { result.Add(new GenericRecommendedItem(itemID, randomPref())); } } return(result); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems()); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { possibleItemIDs.Add(allItemIDs.Current); } possibleItemIDs.RemoveAll(preferredItemIDs); return possibleItemIDs; }
/// <p> /// Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only keep the specified /// number of similarities from the given {@link DataModel}. It will keep those with the highest similarity /// -- those that are therefore most important. /// </p> /// /// <p> /// Thanks to tsmorton for suggesting this and providing part of the implementation. /// </p> /// /// @param otherSimilarity /// other {@link ItemSimilarity} to get similarities from /// @param dataModel /// data model to get items from /// @param maxToKeep /// maximum number of similarities to keep /// @throws TasteException /// if an error occurs while accessing the {@link DataModel} items public GenericItemSimilarity(IItemSimilarity otherSimilarity, IDataModel dataModel, int maxToKeep) { long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.GetItemIDs()); var it = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs); var keptSimilarities = TopItems.GetTopItemItemSimilarities(maxToKeep, it); initSimilarityMaps(keptSimilarities.GetEnumerator()); }
protected override FastIDSet doGetCandidateItems(long[] preferredItemIDs, IDataModel dataModel) { FastIDSet possibleItemIDs = new FastIDSet(dataModel.GetNumItems()); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { possibleItemIDs.Add(allItemIDs.Current); } possibleItemIDs.RemoveAll(preferredItemIDs); return(possibleItemIDs); }
public virtual long[] AllSimilarItemIDs(long itemID) { FastIDSet allSimilarItemIDs = new FastIDSet(); var allItemIDs = dataModel.GetItemIDs(); while (allItemIDs.MoveNext()) { long possiblySimilarItemID = allItemIDs.Current; if (!Double.IsNaN(ItemSimilarity(itemID, possiblySimilarItemID))) { allSimilarItemIDs.Add(possiblySimilarItemID); } } return(allSimilarItemIDs.ToArray()); }
[Test] //(expected = NoSuchElementException.class) public void testGetItems() { var it = model.GetItemIDs(); Assert.NotNull(it); Assert.True(it.MoveNext()); Assert.AreEqual(123, it.Current); Assert.True(it.MoveNext()); Assert.AreEqual(234, it.Current); Assert.True(it.MoveNext()); Assert.AreEqual(456, it.Current); Assert.True(it.MoveNext()); Assert.AreEqual(654, it.Current); Assert.True(it.MoveNext()); Assert.AreEqual(789, it.Current); Assert.True(it.MoveNext()); Assert.AreEqual(999, it.Current); Assert.False(it.MoveNext()); it.MoveNext(); // exception }
public Features(ALSWRFactorizer factorizer) { dataModel = factorizer.dataModel; numFeatures = factorizer.numFeatures; var random = RandomUtils.getRandom(); M = new double[dataModel.GetNumItems()][]; //numFeatures var itemIDsIterator = dataModel.GetItemIDs(); while (itemIDsIterator.MoveNext()) { long itemID = itemIDsIterator.Current; int itemIDIndex = factorizer.itemIndex(itemID); M[itemIDIndex] = new double[numFeatures]; M[itemIDIndex][0] = averateRating(itemID); for (int feature = 1; feature < numFeatures; feature++) { M[itemIDIndex][feature] = random.nextDouble() * 0.1; } } U = new double[dataModel.GetNumUsers()][]; //numFeatures for (int i=0; i<U.Length; i++) U[i] = new double[numFeatures]; }
private void buildMappings() { userIDMapping = createIDMapping(dataModel.GetNumUsers(), dataModel.GetUserIDs()); itemIDMapping = createIDMapping(dataModel.GetNumItems(), dataModel.GetItemIDs()); }
public override Factorization Factorize() { log.Info("starting to compute the factorization..."); Features features = new Features(this); /// feature maps necessary for solving for implicit feedback IDictionary <int, double[]> userY = null; IDictionary <int, double[]> itemY = null; if (usesImplicitFeedback) { userY = userFeaturesMapping(dataModel.GetUserIDs(), dataModel.GetNumUsers(), features.getU()); itemY = itemFeaturesMapping(dataModel.GetItemIDs(), dataModel.GetNumItems(), features.getM()); } IList <Task> tasks; for (int iteration = 0; iteration < numIterations; iteration++) { log.Info("iteration {0}", iteration); /// fix M - compute U tasks = new List <Task>(); var userIDsIterator = dataModel.GetUserIDs(); try { ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, itemY) : null; while (userIDsIterator.MoveNext()) { long userID = userIDsIterator.Current; var itemIDsFromUser = dataModel.GetItemIDsFromUser(userID).GetEnumerator(); IPreferenceArray userPrefs = dataModel.GetPreferencesFromUser(userID); tasks.Add(Task.Factory.StartNew(() => { List <double[]> featureVectors = new List <double[]>(); while (itemIDsFromUser.MoveNext()) { long itemID = itemIDsFromUser.Current; featureVectors.Add(features.getItemFeatureColumn(itemIndex(itemID))); } var userFeatures = usesImplicitFeedback ? implicitFeedbackSolver.solve(sparseUserRatingVector(userPrefs)) : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures); features.setFeatureColumnInU(userIndex(userID), userFeatures); } )); } } finally { // queue.shutdown(); try { Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumUsers()); } catch (AggregateException e) { log.Warn("Error when computing user features", e); throw e; } } /// fix U - compute M //queue = createQueue(); tasks = new List <Task>(); var itemIDsIterator = dataModel.GetItemIDs(); try { ImplicitFeedbackAlternatingLeastSquaresSolver implicitFeedbackSolver = usesImplicitFeedback ? new ImplicitFeedbackAlternatingLeastSquaresSolver(numFeatures, lambda, alpha, userY) : null; while (itemIDsIterator.MoveNext()) { long itemID = itemIDsIterator.Current; IPreferenceArray itemPrefs = dataModel.GetPreferencesForItem(itemID); tasks.Add(Task.Factory.StartNew(() => { var featureVectors = new List <double[]>(); foreach (IPreference pref in itemPrefs) { long userID = pref.GetUserID(); featureVectors.Add(features.getUserFeatureColumn(userIndex(userID))); } var itemFeatures = usesImplicitFeedback ? implicitFeedbackSolver.solve(sparseItemRatingVector(itemPrefs)) : AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures); features.setFeatureColumnInM(itemIndex(itemID), itemFeatures); })); } } finally { try { Task.WaitAll(tasks.ToArray(), 1000 * dataModel.GetNumItems()); //queue.awaitTermination(dataModel.getNumItems(), TimeUnit.SECONDS); } catch (AggregateException e) { log.Warn("Error when computing item features", e); throw e; } } } log.Info("finished computation of the factorization..."); return(createFactorization(features.getU(), features.getM())); }
/// <p> /// Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a /// {@link DataModel}, rather than a list of {@link ItemItemSimilarity}s. /// </p> /// /// <p> /// It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point of an /// item-based recommender. Item-based recommenders use the assumption that item-item similarities are /// relatively fixed, and might be known already independent of user preferences. Hence it is useful to /// inject that information, using {@link #GenericItemSimilarity(Iterable)}. /// </p> /// /// @param otherSimilarity /// other {@link ItemSimilarity} to get similarities from /// @param dataModel /// data model to get items from /// @throws TasteException /// if an error occurs while accessing the {@link DataModel} items public GenericItemSimilarity(IItemSimilarity otherSimilarity, IDataModel dataModel) { long[] itemIDs = GenericUserSimilarity.longIteratorToList(dataModel.GetItemIDs()); initSimilarityMaps(new DataModelSimilaritiesIterator(otherSimilarity, itemIDs)); }
public virtual IEnumerator <long> GetItemIDs() { return(_delegate.GetItemIDs()); // Yeah ignoring items that only the plus-one user knows about... can't really happen }
public void testFactorizerWithToyData() { setUpToyData(); var stopWatch = new System.Diagnostics.Stopwatch(); stopWatch.Start(); factorizer = new ParallelSGDFactorizer(dataModel, rank, lambda, numIterations, 0.01, 1, 0, 0); Factorization factorization = factorizer.Factorize(); stopWatch.Stop(); long duration = stopWatch.ElapsedMilliseconds; /// a hold out test would be better, but this is just a toy example so we only check that the /// factorization is close to the original matrix IRunningAverage avg = new FullRunningAverage(); var userIDs = dataModel.GetUserIDs(); IEnumerator <long> itemIDs; while (userIDs.MoveNext()) { long userID = userIDs.Current; foreach (IPreference pref in dataModel.GetPreferencesFromUser(userID)) { double rating = pref.GetValue(); var userVector = factorization.getUserFeatures(userID); var itemVector = factorization.getItemFeatures(pref.GetItemID()); double estimate = vectorDot(userVector, itemVector); //userVector.dot(itemVector); double err = rating - estimate; avg.AddDatum(err * err); } } double sum = 0.0; userIDs = dataModel.GetUserIDs(); while (userIDs.MoveNext()) { long userID = userIDs.Current; var userVector = factorization.getUserFeatures(userID); double regularization = vectorDot(userVector, userVector); sum += regularization; } itemIDs = dataModel.GetItemIDs(); while (itemIDs.MoveNext()) { long itemID = itemIDs.Current; var itemVector = factorization.getUserFeatures(itemID); double regularization = vectorDot(itemVector, itemVector); sum += regularization; } double rmse = Math.Sqrt(avg.GetAverage()); double loss = avg.GetAverage() / 2 + lambda / 2 * sum; logger.Info("RMSE: " + rmse + ";\tLoss: " + loss + ";\tTime Used: " + duration); Assert.True(rmse < 0.2); }
public override IEnumerator <long> GetItemIDs() { return(_delegate.GetItemIDs()); }