Esempio n. 1
0
        /// <summary>
        /// Starts the crawl of the store contained in the specified directory
        /// </summary>
        /// <param name="storePath">The full path to the directory that contains the store to be crawled</param>
        public void Run(string storePath)
        {
            var dataFile   = new FileInfo(Path.Combine(storePath, AbstractStoreManager.DataFileName));
            var masterFile = new FileInfo(Path.Combine(storePath, AbstractStoreManager.MasterFileName));

            if (!dataFile.Exists)
            {
                throw new FileNotFoundException("Cannot find data file", dataFile.FullName);
            }

            string storeLocation;
            ulong  nextObjectId, resourceIdIndexObjectId, graphUriToIdObjectId;
            PredicateIndexResourceToObjectIdIndex propertyTypeSubjectIndex;
            PredicateIndexResourceToObjectIdIndex propertyTypeObjectIndex;

            var sm     = StoreManagerFactory.GetStoreManager() as AbstractStoreManager;
            var offset = sm.GetLatestStorePositionFromMasterFile(masterFile.FullName);

            // We need to introspect the datastream directly first because Store does not currently surface direct access to index object ids
            using (
                var dataStream =
                    new BinaryReader(new FileStream(dataFile.FullName, FileMode.Open, FileAccess.Read,
                                                    FileShare.ReadWrite)))
            {
                dataStream.BaseStream.Seek((long)offset, SeekOrigin.Begin);
                SerializationUtils.ReadVarint(dataStream);
                var storeLocationSize = (int)SerializationUtils.ReadVarint(dataStream);
                var locationBytes     = dataStream.ReadBytes(storeLocationSize);
                storeLocation           = Encoding.UTF8.GetString(locationBytes, 0, storeLocationSize);
                nextObjectId            = SerializationUtils.ReadVarint(dataStream);
                resourceIdIndexObjectId = SerializationUtils.ReadVarint(dataStream);
                graphUriToIdObjectId    = SerializationUtils.ReadVarint(dataStream);
                _objectLocationManager  = new ObjectLocationManager();
                _objectLocationManager.Read(dataStream);
                propertyTypeObjectIndex = new PredicateIndexResourceToObjectIdIndex();
                propertyTypeObjectIndex.Read(dataStream);
                propertyTypeSubjectIndex = new PredicateIndexResourceToObjectIdIndex();
                propertyTypeSubjectIndex.Read(dataStream);
            }

            _store = sm.OpenStore(storePath, true) as Store;
            var lastCommit = _store.GetCommitPoints().First();

            foreach (var a in _analyzers)
            {
                a.OnStoreStart(_store.ObjectId, storeLocation, nextObjectId, lastCommit.CommitTime);
            }
            CrawlBTree <Bucket>(resourceIdIndexObjectId, "Resource String to Resource ID Index");
            CrawlBTree <Bucket>(graphUriToIdObjectId, "Graph URI to Resource ID Index");
            CrawlPredicateIndex(propertyTypeSubjectIndex, "Property Type Subject Index");
            CrawlPredicateIndex(propertyTypeObjectIndex, "Property Type Object Index");
            foreach (var a in _analyzers)
            {
                a.OnStoreEnd(_store.ObjectId);
            }
        }
Esempio n. 2
0
 private void CrawlPredicateIndex(PredicateIndexResourceToObjectIdIndex predicateIndex, string indexName)
 {
     foreach (var a in _analyzers)
     {
         a.OnPredicateIndexStart(indexName, predicateIndex.Entries.Count());
     }
     foreach (var entry in predicateIndex.Entries)
     {
         var indexId  = entry.IndexObjectId;
         var resource = _store.Resolve(entry.ResourceId);
         CrawlBTree <ObjectRef>(indexId, indexName + " : " + resource.LexicalValue);
     }
     foreach (var a in _analyzers)
     {
         a.OnPredicateIndexEnd(indexName);
     }
 }