public virtual void Load() { if (!_indexFile.IsValidHandle(HandleOfHeaderFile)) { int handle; using (BinaryWriter writer = _indexFile.AllocFile(out handle)) { if (HandleOfHeaderFile != handle) { throw new Exception("BlobFileSystem allocated unexpectable handle for the auxiliary file."); } IndexConstructor.WriteSignature(writer); } } else { using (BinaryReader header = _indexFile.GetFileReader(HandleOfHeaderFile)) { header.ReadInt64(); // skip date int version = header.ReadInt32(); if (version != Version) { throw new FormatException("Version of current index is not consistent with currently implemented(" + version + ":" + Version + ". Force index reconstruction"); } } } isErrorFlagRaised = false; }
//------------------------------------------------------------------------- // Parser plain sequence of bytes into the entries and their instances. // Comment: Some entries may be marked as "removed", that means that // corresponding documents are no longer exist. Thus field // "DocsNumber" counts *ALL* entries - valid and removed, since // we do not have an ability to physically strip sequence of // bytes. Non-existing documents are marked with "-1" as DocID // Thus we have to allocate actual space only AFTER the number of // entries is known. //------------------------------------------------------------------------- protected static void ParseEntry(BinaryReader reader) { int instancesNumber; Entry new_ = new Entry(); new_.DocIndex = IndexConstructor.ReadCount(reader); new_.TfIdf = reader.ReadSingle(); instancesNumber = IndexConstructor.ReadCount(reader) + 1; if (instancesNumber < 0) { throw new FormatException("TermIndexRecord -- Illegal number of instances for a TermIndex record (" + instancesNumber + ") - possible index corruption"); } // NB: Discuss an OpenAPI issue for getting current maximal vlaue of document Id // from the ResourceStore. // if( new_.DocIndex >= 10000000 ) // throw( new IndexConstructor.TextIndexCorruption( "[DocIndex=" + new_.DocIndex + "] value in [TermIndex record Entry] is greater than a reasonable number of documents - possible index corruption" )); //----------------------------------------------------------------- try { if (new_.DocIndex != -1) { InstanceOffset[] Offsets = new InstanceOffset[instancesNumber]; for (int j = 0; j < instancesNumber; j++) { Offsets[j].Offset = reader.ReadUInt32(); Offsets[j].CompoundInfo = reader.ReadUInt32(); } new_.Offsets = Offsets; listTemporaryStorage.Add(new_); } else { // this entry has been "removed", do not use in subsequent // processing new_ = null; } } catch (OutOfMemoryException) { throw new FormatException("TermIndexRecord - illegal number of term instances: [" + instancesNumber + "]"); } }
//------------------------------------------------------------------------- // Assuming that caller has already set the necessay offset in the binary // stream //------------------------------------------------------------------------- public void Save(BinaryWriter writer) { Debug.Assert(DocsNumber > 0); IndexConstructor.WriteCount(writer, HC); //--------------------------------------------------------------------- for (int i = 0; i < DocsNumber; i++) { Entry e = GetEntryAt(i); IndexConstructor.WriteCount(writer, e.DocIndex); writer.Write(e.TfIdf); IndexConstructor.WriteCount(writer, e.Count - 1); // save count minus 1 foreach (InstanceOffset insoff in e.Offsets) { writer.Write(insoff.Offset); writer.Write(insoff.CompoundInfo); } } }
public TermIndexRecord(BinaryReader reader) { try { listTemporaryStorage.Clear(); HC = IndexConstructor.ReadCount(reader); while (true) { ParseEntry(reader); // _chainsCount++; } } catch (EndOfStreamException) { if (listTemporaryStorage.Count > 0) { aEntries = new Entry[listTemporaryStorage.Count]; listTemporaryStorage.CopyTo(aEntries); } } }
public int AddRecord(int docId, int termId, object instances, int maxTermInDoc) { #region Preconditions if (_indexFile == null) { throw new ApplicationException("Aplication has not initialized Accessor yet"); } #endregion Preconditions int recordHandle = GetRecordHandle(termId); BinaryWriter writer; if (recordHandle <= 0) { writer = _indexFile.AllocFile(out recordHandle); TermId2RecordHandle.InsertKey(recordKey, recordHandle); IndexConstructor.WriteCount(writer, termId); } else { int lastClusterHandle = GetRecordHandle(-termId); int saved = lastClusterHandle; writer = _indexFile.AppendFile(recordHandle, ref lastClusterHandle); if (saved != lastClusterHandle) { if (saved > 0) { TermId2RecordHandle.DeleteKey(recordKey, saved); } TermId2RecordHandle.InsertKey(recordKey, lastClusterHandle); } } ++_savedRecords; using ( writer ) { IndexConstructor.WriteEntry(writer, docId, termId, instances, maxTermInDoc); } return(termId); }
private void ManageIndexChunk() { if (_tokens.Count == 0) { return; } try { IndexConstructor.FlushDocument(TermIndexAccessor, _lastDocID, _termMaxFrequency, _tokens); IResource doc = Core.ResourceStore.TryLoadResource(_lastDocID); if (doc != null) { #region Pending Data Update _pendingLock.Enter(); try { _pendingAddends.Add(_lastDocID); _pendingDeletions.Remove(_lastDocID); } finally { _pendingLock.Exit(); } #endregion Pending Data Update Core.ResourceAP.QueueJob(JobPriority.Immediate, _cJobName, new ResourceDelegate(SetIndexedProps), doc); } } catch (IOException e) { Trace.WriteLineIf(!_suppTrace, "-- FullTextIndexer -- Fatal IO Exception occured while constructing text index."); DiscardTextIndex(); throw new IOException("FullTextIndexer -- IO Exception in chunk construction", e); } }