/// <summary> /// Starts the crawl of the store contained in the specified directory /// </summary> /// <param name="storePath">The full path to the directory that contains the store to be crawled</param> public void Run(string storePath) { var dataFile = new FileInfo(Path.Combine(storePath, AbstractStoreManager.DataFileName)); var masterFile = new FileInfo(Path.Combine(storePath, AbstractStoreManager.MasterFileName)); if (!dataFile.Exists) { throw new FileNotFoundException("Cannot find data file", dataFile.FullName); } string storeLocation; ulong nextObjectId, resourceIdIndexObjectId, graphUriToIdObjectId; PredicateIndexResourceToObjectIdIndex propertyTypeSubjectIndex; PredicateIndexResourceToObjectIdIndex propertyTypeObjectIndex; var sm = StoreManagerFactory.GetStoreManager() as AbstractStoreManager; var offset = sm.GetLatestStorePositionFromMasterFile(masterFile.FullName); // We need to introspect the datastream directly first because Store does not currently surface direct access to index object ids using ( var dataStream = new BinaryReader(new FileStream(dataFile.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { dataStream.BaseStream.Seek((long)offset, SeekOrigin.Begin); SerializationUtils.ReadVarint(dataStream); var storeLocationSize = (int)SerializationUtils.ReadVarint(dataStream); var locationBytes = dataStream.ReadBytes(storeLocationSize); storeLocation = Encoding.UTF8.GetString(locationBytes, 0, storeLocationSize); nextObjectId = SerializationUtils.ReadVarint(dataStream); resourceIdIndexObjectId = SerializationUtils.ReadVarint(dataStream); graphUriToIdObjectId = SerializationUtils.ReadVarint(dataStream); _objectLocationManager = new ObjectLocationManager(); _objectLocationManager.Read(dataStream); propertyTypeObjectIndex = new PredicateIndexResourceToObjectIdIndex(); propertyTypeObjectIndex.Read(dataStream); propertyTypeSubjectIndex = new PredicateIndexResourceToObjectIdIndex(); propertyTypeSubjectIndex.Read(dataStream); } _store = sm.OpenStore(storePath, true) as Store; var lastCommit = _store.GetCommitPoints().First(); foreach (var a in _analyzers) { a.OnStoreStart(_store.ObjectId, storeLocation, nextObjectId, lastCommit.CommitTime); } CrawlBTree <Bucket>(resourceIdIndexObjectId, "Resource String to Resource ID Index"); CrawlBTree <Bucket>(graphUriToIdObjectId, "Graph URI to Resource ID Index"); CrawlPredicateIndex(propertyTypeSubjectIndex, "Property Type Subject Index"); CrawlPredicateIndex(propertyTypeObjectIndex, "Property Type Object Index"); foreach (var a in _analyzers) { a.OnStoreEnd(_store.ObjectId); } }
private void CrawlPredicateIndex(PredicateIndexResourceToObjectIdIndex predicateIndex, string indexName) { foreach (var a in _analyzers) { a.OnPredicateIndexStart(indexName, predicateIndex.Entries.Count()); } foreach (var entry in predicateIndex.Entries) { var indexId = entry.IndexObjectId; var resource = _store.Resolve(entry.ResourceId); CrawlBTree <ObjectRef>(indexId, indexName + " : " + resource.LexicalValue); } foreach (var a in _analyzers) { a.OnPredicateIndexEnd(indexName); } }