/// <summary> /// Retrieves the best match from the entity cache. If no matches exist this will attempt to download the file. /// </summary> /// <remarks>returns a cloned copy</remarks> /// <returns>The entity cache result or the original token if it cannot be improved.</returns> private async Task <JToken> GetEntityHelper(JToken originalToken, Uri entity, IEnumerable <Uri> properties) { bool?fetch = await _entityCache.FetchNeeded(entity, properties); JToken token = originalToken; if (fetch == true) { // we are missing properties and do not have the page DataTraceSources.Verbose("[DataClient] GetFile required to Ensure {0}", entity.AbsoluteUri); await EnsureFile(entity); } // null means either there is no work to do, or that we gave up, return the original token here // if the original token is null, meaning this came from Ensure(uri, uri[]) then we need to get the token from the cache if (fetch != null || originalToken == null) { JToken entityCacheResult = await _entityCache.GetEntity(entity); // If the entity cache is unable to improve the result, return the original if (entityCacheResult != null) { token = entityCacheResult.DeepClone(); } } return(token); }
/// <summary> /// Converts a json stream into a JObject. /// </summary> private async static Task <JObject> StreamToJson(Stream stream) { JObject jObj = null; string json = string.Empty; if (stream != null) { try { stream.Seek(0, SeekOrigin.Begin); using (var reader = new StreamReader(stream)) { json = await reader.ReadToEndAsync(); jObj = JObject.Parse(json); } } catch (Exception ex) { DataTraceSources.Verbose("[StreamToJson] Failed {0}", ex.ToString()); jObj = new JObject(); jObj.Add("raw", json); } } else { DataTraceSources.Verbose("[StreamToJson] Null stream!"); } return(jObj); }
/// <summary> /// Returns a JToken for the given entity. /// </summary> /// <param name="entity">JToken @id</param> /// <returns>The entity Json</returns> public async Task <JToken> GetEntity(Uri entity) { if (entity == null) { throw new ArgumentNullException("entity"); } JToken token = await _entityCache.GetEntity(entity); if (token == null) { // we don't have any info on the given entity, try downloading it await EnsureFile(entity); // request the entity again token = await _entityCache.GetEntity(entity); if (token == null) { DataTraceSources.Verbose("[EntityCache] Unable to get entity {0}", entity.AbsoluteUri); Debug.Fail("Unable to get entity"); } } if (token != null) { // clone our cache copy token = token.DeepClone(); } return(token); }
/// <summary> /// Return the entity from the graph /// </summary> /// <param name="entity"></param> /// <returns></returns> private JToken GetEntityFromGraph(Uri entity) { JToken token = null; JsonLdTripleCollection triples = null; DataTraceSources.Verbose("[EntityCache] GetEntity {0}", entity.AbsoluteUri); // make sure everything added at this point has gone into the master graph WaitForTasks(); lock (this) { // find the best JToken for this subject that we have triples = _masterGraph.SelectSubject(entity); // update the last access time for these pages foreach (var page in triples.Select(t => t.Page).Distinct()) { page.UpdateLastUsed(); } } // find the best jtoken for the subject JsonLdTriple triple = triples.Where(n => n.JsonNode != null).OrderByDescending(t => t.HasIdMatchingUrl ? 1 : 0).FirstOrDefault(); if (triple != null) { token = triple.JsonNode; } return(token); }
protected override async Task <HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { HttpResponseMessage response = null; CacheEnabledRequestMessage cacheRequest = request as CacheEnabledRequestMessage; if (cacheRequest != null && cacheRequest.CacheOptions.UseFileCache) { Uri uri = request.RequestUri; using (var uriLock = new UriLock(uri, cancellationToken)) { cancellationToken.ThrowIfCancellationRequested(); // check cache Stream stream = null; if (_fileCache.TryGet(uri, out stream)) { DataTraceSources.Verbose("[HttpClient] Cached Length: {0}", "" + stream.Length); response = new CacheResponse(stream); } else { // get the item and add it to the cache DataTraceSources.Verbose("[HttpClient] GET {0}", uri.AbsoluteUri); response = await base.SendAsync(request, cancellationToken); if (response.IsSuccessStatusCode) { DataTraceSources.Verbose("[HttpClient] Caching {0}"); _fileCache.Add(uri, cacheRequest.CacheOptions.MaxCacheLife, await response.Content.ReadAsStreamAsync()); } } } } if (response == null) { // skip cache DataTraceSources.Verbose("[HttpClient] GET {0}", request.RequestUri.AbsoluteUri); response = await base.SendAsync(request, cancellationToken); } return(response); }
private async Task <JObject> GetFileInternal(Uri uri, TimeSpan cacheTime, bool cacheInGraph = true, bool cloneJson = true) { if (uri == null) { throw new ArgumentNullException("uri"); } if (cacheTime == null) { throw new ArgumentNullException("cacheTime"); } bool cache = cacheTime.TotalSeconds > 0; // request the root document Uri fixedUri = Utility.GetUriWithoutHash(uri); Stream stream = null; JObject result = null; JObject clonedResult = null; // the copy we give the caller try { using (var uriLock = new UriLock(fixedUri)) { if (!cache || !_fileCache.TryGet(fixedUri, out stream)) { // the stream was not in the cache or we are skipping the cache int tries = 0; // try up to 5 times to be a little more robust while (stream == null && tries < 5) { tries++; try { HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, fixedUri.AbsoluteUri); DataTraceSources.Verbose("[HttpClient] GET {0}", fixedUri.AbsoluteUri); var response = await _httpClient.SendAsync(request); Debug.Assert(response.StatusCode == HttpStatusCode.OK, "Received non-OK status code response from " + request.RequestUri.ToString()); if (response.StatusCode == HttpStatusCode.OK) { stream = await response.Content.ReadAsStreamAsync(); if (stream != null) { if (cache) { DataTraceSources.Verbose("[HttpClient] Caching {0}"); _fileCache.Add(fixedUri, _lifeSpan, stream); } DataTraceSources.Verbose("[HttpClient] 200 OK Length: {0}", "" + stream.Length); result = await StreamToJson(stream); } } else { DataTraceSources.Verbose("[HttpClient] FAILED {0}", "" + (int)response.StatusCode); result = new JObject(); result.Add("HttpStatusCode", (int)response.StatusCode); } } catch (HttpRequestException ex) { Debug.Fail("WebRequest failed: " + ex.ToString()); DataTraceSources.Verbose("[HttpClient] FAILED {0}", ex.ToString()); // request error result = new JObject(); result.Add("HttpRequestException", ex.ToString()); } } } else { // the stream was in the cache DataTraceSources.Verbose("[HttpClient] Cached Length: {0}", "" + stream.Length); result = await StreamToJson(stream); } } } finally { if (stream != null) { stream.Dispose(); } } if (result != null) { // this must be called before the entity cache thread starts using it if (cloneJson) { clonedResult = result.DeepClone() as JObject; } else { // in some scenarios we can skip cloning, such as when we are throwing away the result clonedResult = result; } if (cacheInGraph) { // this call is only blocking if the cache is overloaded _entityCache.Add(result, fixedUri); } } return(clonedResult); }
/// <summary> /// Ensures that the given properties are on the JToken. If they are not inlined they will be fetched. /// Other data may appear in the returned JToken, but the root level will stay the same. /// </summary> /// <param name="jToken">The JToken to expand. This should have an @id.</param> /// <param name="properties">Expanded form properties that are needed on JToken.</param> /// <returns>The same JToken if it already exists, otherwise the fetched JToken.</returns> public async Task <JToken> Ensure(JToken token, IEnumerable <Uri> properties) { if (token == null) { throw new ArgumentNullException("token"); } if (properties == null) { throw new ArgumentNullException("properties"); } JObject jObject = token as JObject; if (jObject != null) { CompactEntityReader compactEntity = new CompactEntityReader(jObject); // if the entity is found on it's originating page we know it is already complete in this compact form if (compactEntity.IsFromPage == false) { if (compactEntity.EntityUri != null) { // inspect the compact entity on a basic level to determine if it already has the properties it asked for if (compactEntity.HasPredicates(properties) != true) { // at this point we know the compact token does not include the needed properties, // we need to either download the file it lives on, or find it in the entity cache // if the token is for an entity that just does not exist or is corrupted in some way // the original token will be returned since the entity cache cannot improve it after // trying all possible methods. return(await GetEntityHelper(token, compactEntity.EntityUri, properties)); } } else { DataTraceSources.Verbose("[EntityCache] Unable to find entity @id!"); } } } else if (token.Type == JTokenType.String) { // It's just a URL, so we definitely need to fetch it from the cache string tokenString = token.ToString(); // make sure it is a url if (!String.IsNullOrEmpty(tokenString) && tokenString.StartsWith("http", StringComparison.OrdinalIgnoreCase)) { Uri entityUrl = new Uri(tokenString); // the entity cache should either find the child entity or if this url is a root url the full page will be returned return(await GetEntityHelper(token, entityUrl, properties)); } } else { DataTraceSources.Verbose("[EntityCache] Non-JObject, unable to use this!"); } // give the original token back return(token); }
/// <summary> /// Removes all pages not used within the given time span. /// </summary> private void CleanUp(TimeSpan keepPagesUsedWithin) { DateTime cutOff = DateTime.UtcNow.Subtract(keepPagesUsedWithin); // lock to keep any new pages from being added during this lock (this) { // just in case we show really late if (_disposed) { return; } // create a working set of pages that can be considered locked JsonLdPage[] pages = _pages.Values.ToArray(); // if pages are still loading we should skip the clean up // TODO: post-preview this should force a clean up if the graph is huge if (pages.All(p => p.IsLoaded)) { // check if a clean up is needed if (pages.Any(p => !p.UsedAfter(cutOff))) { List <JsonLdPage> keep = new List <JsonLdPage>(pages.Length); List <JsonLdPage> remove = new List <JsonLdPage>(pages.Length); // pages could potentially change last accessed times, so make the decisions in one shot foreach (var page in pages) { if (page.UsedAfter(cutOff)) { keep.Add(page); } else { remove.Add(page); } } // second check to make sure we need to do this if (remove.Count > 0) { DataTraceSources.Verbose("[EntityCache] EntityCache rebuild started."); JsonLdGraph graph = new JsonLdGraph(); // graph merge foreach (var page in keep) { graph.Merge(page.Graph); } _masterGraph = graph; DataTraceSources.Verbose("[EntityCache] EntityCache rebuild complete."); // remove and dispose of the old pages foreach (var page in remove) { JsonLdPage removedPage = null; if (_pages.TryRemove(page.Uri, out removedPage)) { Debug.Assert(!removedPage.UsedAfter(cutOff), "Someone used a page that was scheduled to be removed. This should have been locked."); removedPage.Dispose(); } else { Debug.Fail(page.Uri.AbsoluteUri + " disappeared from the page cache."); } } } } } } }
private void Load(Action <JsonLdPage> callback) { JObject workingCopy = _compacted; try { if (!Utility.IsValidJsonLd(workingCopy)) { DataTraceSources.Verbose("[EntityCache] Invalid JsonLd skipping {0}", Uri.AbsoluteUri); // we can't parse this page, load a blank graph _graph = new JsonLdGraph(); return; } // we have to modify the json to create the graph, since other people are free to use // _compacted during this time we have to make a copy, after we finish we will throw // away _compacted and provide the copy we used instead. workingCopy = _compacted.DeepClone() as JObject; Uri rootUri = Utility.GetEntityUri(workingCopy); if (rootUri == null) { // remove the blank node string blankUrl = "http://blanknode.nuget.org/" + Guid.NewGuid().ToString(); workingCopy["@id"] = blankUrl; DataTraceSources.Verbose("[EntityCache] BlankNode Doc {0}", blankUrl); } DataTraceSources.Verbose("[EntityCache] Added {0}", Uri.AbsoluteUri); // Load _graph = JsonLdGraph.Load(workingCopy, this); // make the callback which should merge us into the master graph callback(this); } catch (Exception ex) { // Something horrible happened when parsing the json-ld. // The original file may be corrupted. The best option here is to leave the page // out of the entity cache. Requests for entities from the page should default to just returning // the compacted JTokens back. Those jtokens have as much info as we can get in this bad state. DataTraceSources.Verbose("[EntityCache] Unable to load!! {0} {1}", Uri.AbsoluteUri, ex.ToString()); } finally { if (_graph == null) { _graph = new JsonLdGraph(); } // replace the original with the copy we used for the graph _compacted = workingCopy; _loadWait.Set(); _isLoaded = true; } }