Beispiel #1
0
        //-------------------------------------------------------------------------
        //  Parser plain sequence of bytes into the entries and their instances.
        //  Comment: Some entries may be marked as "removed", that means that
        //           corresponding documents are no longer exist. Thus field
        //           "DocsNumber" counts *ALL* entries - valid and removed, since
        //           we do not have an ability to physically strip sequence of
        //           bytes. Non-existing documents are marked with "-1" as DocID
        //           Thus we have to allocate actual space only AFTER the number of
        //           entries is known.
        //-------------------------------------------------------------------------

        protected static void ParseEntry(BinaryReader reader)
        {
            int   instancesNumber;
            Entry new_ = new Entry();

            new_.DocIndex   = IndexConstructor.ReadCount(reader);
            new_.TfIdf      = reader.ReadSingle();
            instancesNumber = IndexConstructor.ReadCount(reader) + 1;

            if (instancesNumber < 0)
            {
                throw new FormatException("TermIndexRecord -- Illegal number of instances for a TermIndex record (" + instancesNumber + ") - possible index corruption");
            }

            // NB: Discuss an OpenAPI issue for getting current maximal vlaue of document Id
            //     from the ResourceStore.
            //            if( new_.DocIndex >= 10000000 )
            //                throw( new IndexConstructor.TextIndexCorruption( "[DocIndex=" + new_.DocIndex + "] value in [TermIndex record Entry] is greater than a reasonable number of documents - possible index corruption" ));

            //-----------------------------------------------------------------
            try
            {
                if (new_.DocIndex != -1)
                {
                    InstanceOffset[] Offsets = new InstanceOffset[instancesNumber];

                    for (int j = 0; j < instancesNumber; j++)
                    {
                        Offsets[j].Offset       = reader.ReadUInt32();
                        Offsets[j].CompoundInfo = reader.ReadUInt32();
                    }
                    new_.Offsets = Offsets;
                    listTemporaryStorage.Add(new_);
                }
                else
                {
                    //  this entry has been "removed", do not use in subsequent
                    //  processing
                    new_ = null;
                }
            }
            catch (OutOfMemoryException)
            {
                throw new FormatException("TermIndexRecord - illegal number of term instances: [" + instancesNumber + "]");
            }
        }
Beispiel #2
0
        public TermIndexRecord(BinaryReader reader)
        {
            try
            {
                listTemporaryStorage.Clear();
                HC = IndexConstructor.ReadCount(reader);
                while (true)
                {
                    ParseEntry(reader);
//                _chainsCount++;
                }
            }
            catch (EndOfStreamException)
            {
                if (listTemporaryStorage.Count > 0)
                {
                    aEntries = new Entry[listTemporaryStorage.Count];
                    listTemporaryStorage.CopyTo(aEntries);
                }
            }
        }