/// <summary>
        /// Read a Movie data file.
        ///
        /// Format of file:
        ///    First line has MovieId followed by a colon.
        ///    Subsequent lines have: ReviewerId,Rating,ReviewDate
        /// </summary>
        /// <param name="dataFile">Name of file to read.</param>
        /// <returns>True if successfully read, false if no such file or failed to read.</returns>
        public bool LoadFile(string dataFile)
        {
            int movieId = 0;

            try
            {
                var lines = File.ReadLines(dataFile);
                movieId = Int32.Parse(lines.First().Trim().Replace(":", ""));
                var movie = new Movie(movieId);
                Movies.Add(movie);
                foreach (var fields in lines.Skip(1).Select(line => line.Trim().Split(new[] { ',' })))
                {
                    // Skip the ReviewDate. Not yet using this information.
                    var review = new Review {
                        ReviewerId = Int32.Parse(fields[0]),
                        Rating     = Int32.Parse(fields[1])
                    };
                    movie.Add(review);
                    if (!ReviewersById.TryGetValue(review.ReviewerId, out Reviewer reviewer))
                    {
                        reviewer = new Reviewer(review.ReviewerId);
                        ReviewersById[review.ReviewerId] = reviewer;
                    }
                    reviewer.Add(movieId, (uint)review.Rating);
                }
            }
            catch (Exception e) {
                Logger.Error($"LoadFile failed for file {dataFile} with error: {e.Message}");
                return(false);
            }
            return(true);
        }
Esempio n. 2
0
        /// <summary>
        /// From among the Reviewers who have reviewed this Movie, find those that have the
        /// most similar opinions about other movies, if such an overlap exists.
        /// </summary>
        /// <param name="reviewer">Reviewer whose opinion of the Movie we do not know and hope to guess.</param>
        /// <param name="k">Number of similar Reviewers to search for.</param>
        /// <param name="window">Half of the maximum number of Reviewers to compare.
        /// If Count is not greater than twice the window, all Reviewers of this Movie will be checked.
        /// Otherwise, the closest Reviewer according to a Binary search by Hilbert position will
        /// be checked, plus a number of Reviewers immediately before it in Hilbert order equal to the window size, and
        /// an identical number after it in Hilbert order.
        /// </param>
        /// <param name="reviewersById">This can be all the known Reviewers or just those drawn from a single cluster if the data has been clustered.
        /// This set contains not just those that have reviewed this Movie.</param>
        /// <returns>Up to K Reviewers whose opinions are similar to the given Reviewer, drawn from an intersection
        /// between reviewersById and the set of reviewers for this movie,
        /// sorted from nearest to farthest, associated with the square distance between the Reviewer and its neighbor.
        /// The List may be empty, if no overlap exists.</returns>
        public List <Tuple <Reviewer, long> > KNearest(Reviewer reviewer, int k, int window, Dictionary <int, Reviewer> reviewersById)
        {
            var point = reviewer.Point;

            // NOTE: This assumes that both the Point and the HilbertOrder for each Reviewer has already been computed.
            if (point == null)
            {
                throw new NullReferenceException($"The Point for reviewer {reviewer.ReviewerId} has not yet been calculated.");
            }
            if (reviewer.HilbertOrder < 0)
            {
                throw new ApplicationException($"Reviewer {reviewer.ReviewerId} has not yet had its HilbertOrder calculated.");
            }
            IEnumerable <int> idsToSort;
            var overlappingReviewerIds = ReviewerIdsSorted
                                         .Where(id => reviewersById.ContainsKey(id))
                                         .ToList();

            if (!overlappingReviewerIds.Any())
            {
                return(new List <Tuple <Reviewer, long> >());
            }
            if (overlappingReviewerIds.Count <= 2 * k)
            {
                // Sort all reviewers of this movie by distance from near to far.
                idsToSort = overlappingReviewerIds;
            }
            else
            {
                // Only sort some reviewers, those within window positions on either side
                // of the insertion point where this reviewer would go if added to the list
                // in Hilbert order.
                var insertionPosition = overlappingReviewerIds.BinarySearch(reviewer.HilbertOrder);
                if (insertionPosition < 0)
                {
                    insertionPosition = ~insertionPosition;
                }
                var lowPosition = Max(0, insertionPosition - window);
                idsToSort = overlappingReviewerIds.Skip(lowPosition).Take(2 * window);
            }
            return(idsToSort
                   .Select(revId => reviewersById[revId])
                   .Select(r => new Tuple <Reviewer, long>(r, point.Measure(r.Point)))
                   .OrderBy(tup => tup.Item2)
                   .Take(k)
                   .ToList());
        }
Esempio n. 3
0
        /// <summary>
        /// Simple way of inferring a rating, weighting each neighboring Review's Rating
        /// in proportion to how close it is to the test reviewer.
        ///
        /// Example:
        ///   If k = 10, the most similar Reviewer is weighted ten, the next is weighted nine,
        ///   on to the least similar Reviewer, whose weight is one.
        /// </summary>
        /// <param name="reviewer">Reviewer whose Rating for this Movie is sought.</param>
        /// <param name="k">Number of near neighbors to include in the computation.</param>
        /// <param name="window">The k nearest by Euclidean distance will be chosen from among the
        /// 2*window nearest according to the Hilbert ordering.</param>
        /// <param name="reviewersById">This can be all the known Reviewers or just those drawn from a single cluster if the data has been clustered.
        /// This set contains not just those that have reviewed this Movie.</param>
        /// <returns>An estimated Rating between one and five.</returns>
        public double InferRating(Reviewer reviewer, int k, int window, Dictionary <int, Reviewer> reviewersById)
        {
            if (Reviews.ContainsKey(reviewer.ReviewerId))
            {
                // No need to estimate; we have the actual value here!
                return(Reviews[reviewer.ReviewerId].Rating);
            }
            var nearest = KNearest(reviewer, k, window, reviewersById);

            if (!nearest.Any())
            {
                return(MeanRating);
            }

            var denominator = k * (k + 1) / 2;
            var numerator   = Enumerable.Range(0, nearest.Count)
                              .Select(i => Reviews[nearest[i].Item1.ReviewerId].Rating * (Count - i))
                              .Sum();

            return((double)numerator / denominator);
        }