Example #1
0
        /// <summary>
        /// Retrieves the best match from the entity cache. If no matches exist this will attempt to download the file.
        /// </summary>
        /// <remarks>returns a cloned copy</remarks>
        /// <returns>The entity cache result or the original token if it cannot be improved.</returns>
        private async Task <JToken> GetEntityHelper(JToken originalToken, Uri entity, IEnumerable <Uri> properties)
        {
            bool?fetch = await _entityCache.FetchNeeded(entity, properties);

            JToken token = originalToken;

            if (fetch == true)
            {
                // we are missing properties and do not have the page
                DataTraceSources.Verbose("[DataClient] GetFile required to Ensure {0}", entity.AbsoluteUri);
                await EnsureFile(entity);
            }

            // null means either there is no work to do, or that we gave up, return the original token here
            // if the original token is null, meaning this came from Ensure(uri, uri[]) then we need to get the token from the cache
            if (fetch != null || originalToken == null)
            {
                JToken entityCacheResult = await _entityCache.GetEntity(entity);

                // If the entity cache is unable to improve the result, return the original
                if (entityCacheResult != null)
                {
                    token = entityCacheResult.DeepClone();
                }
            }

            return(token);
        }
Example #2
0
        /// <summary>
        /// Converts a json stream into a JObject.
        /// </summary>
        private async static Task <JObject> StreamToJson(Stream stream)
        {
            JObject jObj = null;
            string  json = string.Empty;

            if (stream != null)
            {
                try
                {
                    stream.Seek(0, SeekOrigin.Begin);

                    using (var reader = new StreamReader(stream))
                    {
                        json = await reader.ReadToEndAsync();

                        jObj = JObject.Parse(json);
                    }
                }
                catch (Exception ex)
                {
                    DataTraceSources.Verbose("[StreamToJson] Failed {0}", ex.ToString());
                    jObj = new JObject();
                    jObj.Add("raw", json);
                }
            }
            else
            {
                DataTraceSources.Verbose("[StreamToJson] Null stream!");
            }

            return(jObj);
        }
Example #3
0
        /// <summary>
        /// Returns a JToken for the given entity.
        /// </summary>
        /// <param name="entity">JToken @id</param>
        /// <returns>The entity Json</returns>
        public async Task <JToken> GetEntity(Uri entity)
        {
            if (entity == null)
            {
                throw new ArgumentNullException("entity");
            }

            JToken token = await _entityCache.GetEntity(entity);

            if (token == null)
            {
                // we don't have any info on the given entity, try downloading it
                await EnsureFile(entity);

                // request the entity again
                token = await _entityCache.GetEntity(entity);

                if (token == null)
                {
                    DataTraceSources.Verbose("[EntityCache] Unable to get entity {0}", entity.AbsoluteUri);
                    Debug.Fail("Unable to get entity");
                }
            }

            if (token != null)
            {
                // clone our cache copy
                token = token.DeepClone();
            }

            return(token);
        }
Example #4
0
        /// <summary>
        /// Return the entity from the graph
        /// </summary>
        /// <param name="entity"></param>
        /// <returns></returns>
        private JToken GetEntityFromGraph(Uri entity)
        {
            JToken token = null;

            JsonLdTripleCollection triples = null;

            DataTraceSources.Verbose("[EntityCache] GetEntity {0}", entity.AbsoluteUri);

            // make sure everything added at this point has gone into the master graph
            WaitForTasks();

            lock (this)
            {
                // find the best JToken for this subject that we have
                triples = _masterGraph.SelectSubject(entity);

                // update the last access time for these pages
                foreach (var page in triples.Select(t => t.Page).Distinct())
                {
                    page.UpdateLastUsed();
                }
            }

            // find the best jtoken for the subject
            JsonLdTriple triple = triples.Where(n => n.JsonNode != null).OrderByDescending(t => t.HasIdMatchingUrl ? 1 : 0).FirstOrDefault();

            if (triple != null)
            {
                token = triple.JsonNode;
            }

            return(token);
        }
Example #5
0
        protected override async Task <HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            HttpResponseMessage response = null;

            CacheEnabledRequestMessage cacheRequest = request as CacheEnabledRequestMessage;

            if (cacheRequest != null && cacheRequest.CacheOptions.UseFileCache)
            {
                Uri uri = request.RequestUri;

                using (var uriLock = new UriLock(uri, cancellationToken))
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    // check cache
                    Stream stream = null;
                    if (_fileCache.TryGet(uri, out stream))
                    {
                        DataTraceSources.Verbose("[HttpClient] Cached Length: {0}", "" + stream.Length);

                        response = new CacheResponse(stream);
                    }
                    else
                    {
                        // get the item and add it to the cache
                        DataTraceSources.Verbose("[HttpClient] GET {0}", uri.AbsoluteUri);

                        response = await base.SendAsync(request, cancellationToken);

                        if (response.IsSuccessStatusCode)
                        {
                            DataTraceSources.Verbose("[HttpClient] Caching {0}");
                            _fileCache.Add(uri, cacheRequest.CacheOptions.MaxCacheLife, await response.Content.ReadAsStreamAsync());
                        }
                    }
                }
            }

            if (response == null)
            {
                // skip cache
                DataTraceSources.Verbose("[HttpClient] GET {0}", request.RequestUri.AbsoluteUri);
                response = await base.SendAsync(request, cancellationToken);
            }

            return(response);
        }
Example #6
0
        private async Task <JObject> GetFileInternal(Uri uri, TimeSpan cacheTime, bool cacheInGraph = true, bool cloneJson = true)
        {
            if (uri == null)
            {
                throw new ArgumentNullException("uri");
            }

            if (cacheTime == null)
            {
                throw new ArgumentNullException("cacheTime");
            }

            bool cache = cacheTime.TotalSeconds > 0;

            // request the root document
            Uri fixedUri = Utility.GetUriWithoutHash(uri);

            Stream  stream       = null;
            JObject result       = null;
            JObject clonedResult = null; // the copy we give the caller

            try
            {
                using (var uriLock = new UriLock(fixedUri))
                {
                    if (!cache || !_fileCache.TryGet(fixedUri, out stream))
                    {
                        // the stream was not in the cache or we are skipping the cache
                        int tries = 0;

                        // try up to 5 times to be a little more robust
                        while (stream == null && tries < 5)
                        {
                            tries++;

                            try
                            {
                                HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, fixedUri.AbsoluteUri);

                                DataTraceSources.Verbose("[HttpClient] GET {0}", fixedUri.AbsoluteUri);

                                var response = await _httpClient.SendAsync(request);

                                Debug.Assert(response.StatusCode == HttpStatusCode.OK, "Received non-OK status code response from " + request.RequestUri.ToString());
                                if (response.StatusCode == HttpStatusCode.OK)
                                {
                                    stream = await response.Content.ReadAsStreamAsync();

                                    if (stream != null)
                                    {
                                        if (cache)
                                        {
                                            DataTraceSources.Verbose("[HttpClient] Caching {0}");
                                            _fileCache.Add(fixedUri, _lifeSpan, stream);
                                        }

                                        DataTraceSources.Verbose("[HttpClient] 200 OK Length: {0}", "" + stream.Length);
                                        result = await StreamToJson(stream);
                                    }
                                }
                                else
                                {
                                    DataTraceSources.Verbose("[HttpClient] FAILED {0}", "" + (int)response.StatusCode);
                                    result = new JObject();
                                    result.Add("HttpStatusCode", (int)response.StatusCode);
                                }
                            }
                            catch (HttpRequestException ex)
                            {
                                Debug.Fail("WebRequest failed: " + ex.ToString());
                                DataTraceSources.Verbose("[HttpClient] FAILED {0}", ex.ToString());

                                // request error
                                result = new JObject();
                                result.Add("HttpRequestException", ex.ToString());
                            }
                        }
                    }
                    else
                    {
                        // the stream was in the cache
                        DataTraceSources.Verbose("[HttpClient] Cached Length: {0}", "" + stream.Length);
                        result = await StreamToJson(stream);
                    }
                }
            }
            finally
            {
                if (stream != null)
                {
                    stream.Dispose();
                }
            }

            if (result != null)
            {
                // this must be called before the entity cache thread starts using it
                if (cloneJson)
                {
                    clonedResult = result.DeepClone() as JObject;
                }
                else
                {
                    // in some scenarios we can skip cloning, such as when we are throwing away the result
                    clonedResult = result;
                }

                if (cacheInGraph)
                {
                    // this call is only blocking if the cache is overloaded
                    _entityCache.Add(result, fixedUri);
                }
            }

            return(clonedResult);
        }
Example #7
0
        /// <summary>
        /// Ensures that the given properties are on the JToken. If they are not inlined they will be fetched.
        /// Other data may appear in the returned JToken, but the root level will stay the same.
        /// </summary>
        /// <param name="jToken">The JToken to expand. This should have an @id.</param>
        /// <param name="properties">Expanded form properties that are needed on JToken.</param>
        /// <returns>The same JToken if it already exists, otherwise the fetched JToken.</returns>
        public async Task <JToken> Ensure(JToken token, IEnumerable <Uri> properties)
        {
            if (token == null)
            {
                throw new ArgumentNullException("token");
            }

            if (properties == null)
            {
                throw new ArgumentNullException("properties");
            }

            JObject jObject = token as JObject;

            if (jObject != null)
            {
                CompactEntityReader compactEntity = new CompactEntityReader(jObject);

                // if the entity is found on it's originating page we know it is already complete in this compact form
                if (compactEntity.IsFromPage == false)
                {
                    if (compactEntity.EntityUri != null)
                    {
                        // inspect the compact entity on a basic level to determine if it already has the properties it asked for
                        if (compactEntity.HasPredicates(properties) != true)
                        {
                            // at this point we know the compact token does not include the needed properties,
                            // we need to either download the file it lives on, or find it in the entity cache
                            // if the token is for an entity that just does not exist or is corrupted in some way
                            // the original token will be returned since the entity cache cannot improve it after
                            // trying all possible methods.
                            return(await GetEntityHelper(token, compactEntity.EntityUri, properties));
                        }
                    }
                    else
                    {
                        DataTraceSources.Verbose("[EntityCache] Unable to find entity @id!");
                    }
                }
            }
            else if (token.Type == JTokenType.String)
            {
                // It's just a URL, so we definitely need to fetch it from the cache
                string tokenString = token.ToString();

                // make sure it is a url
                if (!String.IsNullOrEmpty(tokenString) && tokenString.StartsWith("http", StringComparison.OrdinalIgnoreCase))
                {
                    Uri entityUrl = new Uri(tokenString);

                    // the entity cache should either find the child entity or if this url is a root url the full page will be returned
                    return(await GetEntityHelper(token, entityUrl, properties));
                }
            }
            else
            {
                DataTraceSources.Verbose("[EntityCache] Non-JObject, unable to use this!");
            }

            // give the original token back
            return(token);
        }
Example #8
0
        /// <summary>
        /// Removes all pages not used within the given time span.
        /// </summary>
        private void CleanUp(TimeSpan keepPagesUsedWithin)
        {
            DateTime cutOff = DateTime.UtcNow.Subtract(keepPagesUsedWithin);

            // lock to keep any new pages from being added during this
            lock (this)
            {
                // just in case we show really late
                if (_disposed)
                {
                    return;
                }

                // create a working set of pages that can be considered locked
                JsonLdPage[] pages = _pages.Values.ToArray();

                // if pages are still loading we should skip the clean up
                // TODO: post-preview this should force a clean up if the graph is huge
                if (pages.All(p => p.IsLoaded))
                {
                    // check if a clean up is needed
                    if (pages.Any(p => !p.UsedAfter(cutOff)))
                    {
                        List <JsonLdPage> keep   = new List <JsonLdPage>(pages.Length);
                        List <JsonLdPage> remove = new List <JsonLdPage>(pages.Length);

                        // pages could potentially change last accessed times, so make the decisions in one shot
                        foreach (var page in pages)
                        {
                            if (page.UsedAfter(cutOff))
                            {
                                keep.Add(page);
                            }
                            else
                            {
                                remove.Add(page);
                            }
                        }

                        // second check to make sure we need to do this
                        if (remove.Count > 0)
                        {
                            DataTraceSources.Verbose("[EntityCache] EntityCache rebuild started.");

                            JsonLdGraph graph = new JsonLdGraph();

                            // graph merge
                            foreach (var page in keep)
                            {
                                graph.Merge(page.Graph);
                            }

                            _masterGraph = graph;

                            DataTraceSources.Verbose("[EntityCache] EntityCache rebuild complete.");

                            // remove and dispose of the old pages
                            foreach (var page in remove)
                            {
                                JsonLdPage removedPage = null;
                                if (_pages.TryRemove(page.Uri, out removedPage))
                                {
                                    Debug.Assert(!removedPage.UsedAfter(cutOff), "Someone used a page that was scheduled to be removed. This should have been locked.");
                                    removedPage.Dispose();
                                }
                                else
                                {
                                    Debug.Fail(page.Uri.AbsoluteUri + " disappeared from the page cache.");
                                }
                            }
                        }
                    }
                }
            }
        }
Example #9
0
        private void Load(Action <JsonLdPage> callback)
        {
            JObject workingCopy = _compacted;

            try
            {
                if (!Utility.IsValidJsonLd(workingCopy))
                {
                    DataTraceSources.Verbose("[EntityCache] Invalid JsonLd skipping {0}", Uri.AbsoluteUri);

                    // we can't parse this page, load a blank graph
                    _graph = new JsonLdGraph();

                    return;
                }

                // we have to modify the json to create the graph, since other people are free to use
                // _compacted during this time we have to make a copy, after we finish we will throw
                // away _compacted and provide the copy we used instead.
                workingCopy = _compacted.DeepClone() as JObject;

                Uri rootUri = Utility.GetEntityUri(workingCopy);

                if (rootUri == null)
                {
                    // remove the blank node
                    string blankUrl = "http://blanknode.nuget.org/" + Guid.NewGuid().ToString();
                    workingCopy["@id"] = blankUrl;
                    DataTraceSources.Verbose("[EntityCache] BlankNode Doc {0}", blankUrl);
                }

                DataTraceSources.Verbose("[EntityCache] Added {0}", Uri.AbsoluteUri);

                // Load
                _graph = JsonLdGraph.Load(workingCopy, this);

                // make the callback which should merge us into the master graph
                callback(this);
            }
            catch (Exception ex)
            {
                // Something horrible happened when parsing the json-ld.
                // The original file may be corrupted. The best option here is to leave the page
                // out of the entity cache. Requests for entities from the page should default to just returning
                // the compacted JTokens back. Those jtokens have as much info as we can get in this bad state.

                DataTraceSources.Verbose("[EntityCache] Unable to load!! {0} {1}", Uri.AbsoluteUri, ex.ToString());
            }
            finally
            {
                if (_graph == null)
                {
                    _graph = new JsonLdGraph();
                }

                // replace the original with the copy we used for the graph
                _compacted = workingCopy;

                _loadWait.Set();
                _isLoaded = true;
            }
        }