/// <summary> /// Gets the first 5 videos related to the specified video. /// </summary> public async Task<RelatedVideos> GetRelatedVideos(RelatedVideosQuery query) { // Set the base URL of the REST client to use the first node in the Cassandra cluster string nodeIp = _session.Cluster.AllHosts().First().Address.Address.ToString(); _restClient.BaseUrl = new Uri(string.Format("http://{0}:8983/solr", nodeIp)); //WebRequest mltRequest = WebRequest.Create("http://127.0.2.15:8983/solr/killrvideo.videos/mlt?q=videoid%3Asome-uuid&wt=json&indent=true&qt=mlt&mlt.fl=name&mlt.mindf=1&mlt.mintf=1"); var request = new RestRequest("killrvideo.videos/mlt"); request.AddParameter("q", string.Format("videoid:\"{0}\"", query.VideoId)); request.AddParameter("wt", "json"); // Paging information int start; if (query.PagingState == null || int.TryParse(query.PagingState, out start) == false) start = 0; request.AddParameter("start", start); request.AddParameter("rows", query.PageSize); //MLT Fields to Consider request.AddParameter("mlt.fl", "name,description,tags"); //MLT Minimum Document Frequency - the frequency at which words will be ignored which do not occur in at least this many docs. request.AddParameter("mlt.mindf", 2); //MLT Minimum Term Frequency - the frequency below which terms will be ignored in the source doc. request.AddParameter("mlt.mintf", 2); IRestResponse<MLTQueryResult> response = await _restClient.ExecuteTaskAsync<MLTQueryResult>(request).ConfigureAwait(false); // Check for network/timeout errors if (response.ResponseStatus != ResponseStatus.Completed) { Logger.Error(response.ErrorException, "Error while querying Solr video suggestions from {host} for {query}", nodeIp, query); return new RelatedVideos { VideoId = query.VideoId, Videos = Enumerable.Empty<VideoPreview>(), PagingState = null }; } // Check for HTTP error codes if (response.StatusCode != HttpStatusCode.OK) { Logger.Error("HTTP status code {code} while querying Solr video suggestions from {host} for {query}", (int) response.StatusCode, nodeIp, query); return new RelatedVideos { VideoId = query.VideoId, Videos = Enumerable.Empty<VideoPreview>(), PagingState = null }; } // Success int nextPageStartIndex = response.Data.Response.Start + response.Data.Response.Docs.Count; string pagingState = nextPageStartIndex == response.Data.Response.NumFound ? null : nextPageStartIndex.ToString(); return new RelatedVideos { VideoId = query.VideoId, Videos = response.Data.Response.Docs, PagingState = pagingState }; }
/// <summary> /// Gets the first 4 videos related to the specified video. Does not support paging. /// </summary> public async Task<RelatedVideos> GetRelatedVideos(RelatedVideosQuery queryParams) { // Lookup the tags for the video PreparedStatement tagsForVideoPrepared = await _statementCache.NoContext.GetOrAddAsync("SELECT tags FROM videos WHERE videoid = ?"); BoundStatement tagsForVideoBound = tagsForVideoPrepared.Bind(queryParams.VideoId); RowSet tagRows = await _session.ExecuteAsync(tagsForVideoBound).ConfigureAwait(false); Row tagRow = tagRows.SingleOrDefault(); if (tagRow == null) return new RelatedVideos { VideoId = queryParams.VideoId, Videos = Enumerable.Empty<VideoPreview>(), PagingState = null }; var tagsValue = tagRow.GetValue<IEnumerable<string>>("tags"); var tags = tagsValue == null ? new List<string>() : tagsValue.ToList(); // If there are no tags, we can't find related videos if (tags.Count == 0) return new RelatedVideos { VideoId = queryParams.VideoId, Videos = Enumerable.Empty<VideoPreview>(), PagingState = null }; var relatedVideos = new Dictionary<Guid, VideoPreview>(); PreparedStatement videosForTagPrepared = await _statementCache.NoContext.GetOrAddAsync("SELECT * FROM videos_by_tag WHERE tag = ? LIMIT ?"); var inFlightQueries = new List<Task<RowSet>>(); for (var i = 0; i < tags.Count; i++) { // Use the number of results we ultimately want * 2 when querying so that we can account for potentially having to filter // out the video Id we're using as the basis for the query as well as duplicates const int pageSize = RelatedVideosToReturn * 2; // Kick off a query for each tag and track them in the inflight requests list string tag = tags[i]; IStatement query = videosForTagPrepared.Bind(tag, pageSize); inFlightQueries.Add(_session.ExecuteAsync(query)); // Every third query, or if this is the last tag, wait on all the query results if (inFlightQueries.Count == 3 || i == tags.Count - 1) { RowSet[] results = await Task.WhenAll(inFlightQueries).ConfigureAwait(false); foreach (RowSet rowSet in results) { foreach (Row row in rowSet) { VideoPreview preview = MapRowToVideoPreview(row); // Skip self if (preview.VideoId == queryParams.VideoId) continue; // Skip videos we already have in the results if (relatedVideos.ContainsKey(preview.VideoId)) continue; // Add to results relatedVideos.Add(preview.VideoId, preview); // If we've got enough, no reason to continue if (relatedVideos.Count >= RelatedVideosToReturn) break; } // If we've got enough, no reason to continue if (relatedVideos.Count >= RelatedVideosToReturn) break; } // See if we've got enough results now to fulfill our requirement if (relatedVideos.Count >= RelatedVideosToReturn) break; // We don't have enough yet, so reset the inflight requests to allow another batch of tags to be queried inFlightQueries.Clear(); } } return new RelatedVideos { VideoId = queryParams.VideoId, Videos = relatedVideos.Values, PagingState = null }; }