Esempio n. 1
0
 public virtual void  Load()
 {
     if (!_indexFile.IsValidHandle(HandleOfHeaderFile))
     {
         int handle;
         using (BinaryWriter writer = _indexFile.AllocFile(out handle))
         {
             if (HandleOfHeaderFile != handle)
             {
                 throw new Exception("BlobFileSystem allocated unexpectable handle for the auxiliary file.");
             }
             IndexConstructor.WriteSignature(writer);
         }
     }
     else
     {
         using (BinaryReader header = _indexFile.GetFileReader(HandleOfHeaderFile))
         {
             header.ReadInt64(); // skip date
             int version = header.ReadInt32();
             if (version != Version)
             {
                 throw new FormatException("Version of current index is not consistent with currently implemented(" +
                                           version + ":" + Version + ". Force index reconstruction");
             }
         }
     }
     isErrorFlagRaised = false;
 }
Esempio n. 2
0
        //-------------------------------------------------------------------------
        //  Parser plain sequence of bytes into the entries and their instances.
        //  Comment: Some entries may be marked as "removed", that means that
        //           corresponding documents are no longer exist. Thus field
        //           "DocsNumber" counts *ALL* entries - valid and removed, since
        //           we do not have an ability to physically strip sequence of
        //           bytes. Non-existing documents are marked with "-1" as DocID
        //           Thus we have to allocate actual space only AFTER the number of
        //           entries is known.
        //-------------------------------------------------------------------------

        protected static void ParseEntry(BinaryReader reader)
        {
            int   instancesNumber;
            Entry new_ = new Entry();

            new_.DocIndex   = IndexConstructor.ReadCount(reader);
            new_.TfIdf      = reader.ReadSingle();
            instancesNumber = IndexConstructor.ReadCount(reader) + 1;

            if (instancesNumber < 0)
            {
                throw new FormatException("TermIndexRecord -- Illegal number of instances for a TermIndex record (" + instancesNumber + ") - possible index corruption");
            }

            // NB: Discuss an OpenAPI issue for getting current maximal vlaue of document Id
            //     from the ResourceStore.
            //            if( new_.DocIndex >= 10000000 )
            //                throw( new IndexConstructor.TextIndexCorruption( "[DocIndex=" + new_.DocIndex + "] value in [TermIndex record Entry] is greater than a reasonable number of documents - possible index corruption" ));

            //-----------------------------------------------------------------
            try
            {
                if (new_.DocIndex != -1)
                {
                    InstanceOffset[] Offsets = new InstanceOffset[instancesNumber];

                    for (int j = 0; j < instancesNumber; j++)
                    {
                        Offsets[j].Offset       = reader.ReadUInt32();
                        Offsets[j].CompoundInfo = reader.ReadUInt32();
                    }
                    new_.Offsets = Offsets;
                    listTemporaryStorage.Add(new_);
                }
                else
                {
                    //  this entry has been "removed", do not use in subsequent
                    //  processing
                    new_ = null;
                }
            }
            catch (OutOfMemoryException)
            {
                throw new FormatException("TermIndexRecord - illegal number of term instances: [" + instancesNumber + "]");
            }
        }
Esempio n. 3
0
        //-------------------------------------------------------------------------
        //  Assuming that caller has already set the necessay offset in the binary
        //  stream
        //-------------------------------------------------------------------------
        public void Save(BinaryWriter writer)
        {
            Debug.Assert(DocsNumber > 0);

            IndexConstructor.WriteCount(writer, HC);
            //---------------------------------------------------------------------
            for (int i = 0; i < DocsNumber; i++)
            {
                Entry e = GetEntryAt(i);
                IndexConstructor.WriteCount(writer, e.DocIndex);
                writer.Write(e.TfIdf);
                IndexConstructor.WriteCount(writer, e.Count - 1); // save count minus 1

                foreach (InstanceOffset insoff in e.Offsets)
                {
                    writer.Write(insoff.Offset);
                    writer.Write(insoff.CompoundInfo);
                }
            }
        }
Esempio n. 4
0
        public TermIndexRecord(BinaryReader reader)
        {
            try
            {
                listTemporaryStorage.Clear();
                HC = IndexConstructor.ReadCount(reader);
                while (true)
                {
                    ParseEntry(reader);
//                _chainsCount++;
                }
            }
            catch (EndOfStreamException)
            {
                if (listTemporaryStorage.Count > 0)
                {
                    aEntries = new Entry[listTemporaryStorage.Count];
                    listTemporaryStorage.CopyTo(aEntries);
                }
            }
        }
Esempio n. 5
0
        public int AddRecord(int docId, int termId, object instances, int maxTermInDoc)
        {
            #region Preconditions
            if (_indexFile == null)
            {
                throw new ApplicationException("Aplication has not initialized Accessor yet");
            }
            #endregion Preconditions

            int          recordHandle = GetRecordHandle(termId);
            BinaryWriter writer;
            if (recordHandle <= 0)
            {
                writer = _indexFile.AllocFile(out recordHandle);
                TermId2RecordHandle.InsertKey(recordKey, recordHandle);
                IndexConstructor.WriteCount(writer, termId);
            }
            else
            {
                int lastClusterHandle = GetRecordHandle(-termId);
                int saved             = lastClusterHandle;
                writer = _indexFile.AppendFile(recordHandle, ref lastClusterHandle);
                if (saved != lastClusterHandle)
                {
                    if (saved > 0)
                    {
                        TermId2RecordHandle.DeleteKey(recordKey, saved);
                    }
                    TermId2RecordHandle.InsertKey(recordKey, lastClusterHandle);
                }
            }
            ++_savedRecords;

            using ( writer )
            {
                IndexConstructor.WriteEntry(writer, docId, termId, instances, maxTermInDoc);
            }
            return(termId);
        }
Esempio n. 6
0
        private void  ManageIndexChunk()
        {
            if (_tokens.Count == 0)
            {
                return;
            }
            try
            {
                IndexConstructor.FlushDocument(TermIndexAccessor, _lastDocID, _termMaxFrequency, _tokens);

                IResource doc = Core.ResourceStore.TryLoadResource(_lastDocID);
                if (doc != null)
                {
                    #region Pending Data Update
                    _pendingLock.Enter();
                    try
                    {
                        _pendingAddends.Add(_lastDocID);
                        _pendingDeletions.Remove(_lastDocID);
                    }
                    finally
                    {
                        _pendingLock.Exit();
                    }
                    #endregion Pending Data Update

                    Core.ResourceAP.QueueJob(JobPriority.Immediate, _cJobName, new ResourceDelegate(SetIndexedProps), doc);
                }
            }
            catch (IOException e)
            {
                Trace.WriteLineIf(!_suppTrace, "-- FullTextIndexer -- Fatal IO Exception occured while constructing text index.");
                DiscardTextIndex();

                throw new IOException("FullTextIndexer -- IO Exception in chunk construction", e);
            }
        }