//------------------------------------------------------------------------- // Parser plain sequence of bytes into the entries and their instances. // Comment: Some entries may be marked as "removed", that means that // corresponding documents are no longer exist. Thus field // "DocsNumber" counts *ALL* entries - valid and removed, since // we do not have an ability to physically strip sequence of // bytes. Non-existing documents are marked with "-1" as DocID // Thus we have to allocate actual space only AFTER the number of // entries is known. //------------------------------------------------------------------------- protected static void ParseEntry(BinaryReader reader) { int instancesNumber; Entry new_ = new Entry(); new_.DocIndex = IndexConstructor.ReadCount(reader); new_.TfIdf = reader.ReadSingle(); instancesNumber = IndexConstructor.ReadCount(reader) + 1; if (instancesNumber < 0) { throw new FormatException("TermIndexRecord -- Illegal number of instances for a TermIndex record (" + instancesNumber + ") - possible index corruption"); } // NB: Discuss an OpenAPI issue for getting current maximal vlaue of document Id // from the ResourceStore. // if( new_.DocIndex >= 10000000 ) // throw( new IndexConstructor.TextIndexCorruption( "[DocIndex=" + new_.DocIndex + "] value in [TermIndex record Entry] is greater than a reasonable number of documents - possible index corruption" )); //----------------------------------------------------------------- try { if (new_.DocIndex != -1) { InstanceOffset[] Offsets = new InstanceOffset[instancesNumber]; for (int j = 0; j < instancesNumber; j++) { Offsets[j].Offset = reader.ReadUInt32(); Offsets[j].CompoundInfo = reader.ReadUInt32(); } new_.Offsets = Offsets; listTemporaryStorage.Add(new_); } else { // this entry has been "removed", do not use in subsequent // processing new_ = null; } } catch (OutOfMemoryException) { throw new FormatException("TermIndexRecord - illegal number of term instances: [" + instancesNumber + "]"); } }
public TermIndexRecord(BinaryReader reader) { try { listTemporaryStorage.Clear(); HC = IndexConstructor.ReadCount(reader); while (true) { ParseEntry(reader); // _chainsCount++; } } catch (EndOfStreamException) { if (listTemporaryStorage.Count > 0) { aEntries = new Entry[listTemporaryStorage.Count]; listTemporaryStorage.CopyTo(aEntries); } } }