Exemplo n.º 1
0
        //-private DiskTag diskTag = new DiskTag();
        private bool IsExistsDiskRoutine(string searchText)
        {
            //Normlize text if allowed
            if (_NormlizeTextFunction != null)
            {
                searchText = _NormlizeTextFunction(searchText);
            }

            //Calculate hash-code
            uint hashCode = IndexFileHelper.HashFunction(searchText, this.IndexHeader.HashtableSize);


            //Search in hashtable
            //Read structure [DiskHtRecord_t] from index file
            DiskSlot diskSlot = _IndexFileBinaryReader.SeekAndReadDiskSlot
                                (
                this.IndexHeader.SizeOf + hashCode * DiskSlot.SizeOf,
                _DiskSlotReadBuffer
                                );

            //Free slot => search text not found
            if (diskSlot.PositionInDataFile == Consts.FREE)
            {
                // == NOT FOUND
                return(false);  /*"ни хуяшки нету :("*/
            }

            //Set file-pointer in position into data-file
            //Read text from data-file
            var text = _DataFileTextLineReader.SeekAndRead4Searching(diskSlot.PositionInDataFile);

            //Normlize text if allowed
            if (_NormlizeTextFunction != null)
            {
                text = _NormlizeTextFunction(text);
            }
            //Input text equal text in data-file => search text found (with first step)
            if (text == searchText)
            {
                // == FOUND ! first step
                return(true);
            }


            //Search in hashtable tag chain's
            var diskTag = new DiskTag()
            {
                NextTagOffset = diskSlot.FirstTagOffset
            };

            while (0 < diskTag.NextTagOffset)
            {
                diskTag = _IndexFileBinaryReader.SeekAndReadDiskTag(diskTag.NextTagOffset, _DiskTagReadBuffer);

                text = _DataFileTextLineReader.SeekAndRead4Searching(diskTag.PositionInDataFile);
                //Normlize text if allowed
                if (_NormlizeTextFunction != null)
                {
                    text = _NormlizeTextFunction(text);
                }
                if (text == searchText)
                {
                    // == FOUND !
                    return(true);
                }
            }

            // == NOT FOUND
            return(false);
        }
        /// <summary>
        /// Int64 file-pointer version
        /// </summary>
        /// <param name="dataFileFullName"></param>
        /// <param name="dataFileEncoding"></param>
        /// <param name="hashTableSize"></param>
        private static void BuildIndexInMemory(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction)
        {
            //
            var indexHeader = new HashtableIndexFileHeader
                              (
                dataFileFullName,
                dataFileEncoding,
                hashTableSize
                              );
            //
            var hashTable = new MemorySlot[indexHeader.HashtableSize];

            //
            using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding))
            {
                #region [.2 calc index in memory.]
                int dataRecordCount     = 0;
                int dataRecordMaxLenght = 0;
                while (!dataFileTextLineReader.EndOfStream)
                {
                    fpos_t position = dataFileTextLineReader.StreamPosition;

                    var text = dataFileTextLineReader.ReadLine4Indexing();
                    if (text.IsEmptyOrNull())
                    {
                        continue;
                    }
                    //Normlize text if allowed
                    if (normlizeTextFunction != null)
                    {
                        text = normlizeTextFunction(text);

                        if (text.IsEmptyOrNull())
                        {
                            continue;
                        }
                    }

                    uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize);

                    MemorySlot memorySlot = hashTable[hashCode];
                    if (memorySlot == null)
                    {
                        hashTable[hashCode] = new MemorySlot(position);
                    }
                    else
                    {
                        MemoryTag newMemoryTag = new MemoryTag(position);

                        if (memorySlot.FirstMemoryTag == null)
                        {
                            memorySlot.FirstMemoryTag = newMemoryTag;
                        }
                        else
                        {
                            MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                            while (memoryTag.NextMemoryTag != null)
                            {
                                memoryTag = memoryTag.NextMemoryTag;
                            }
                            memoryTag.NextMemoryTag = newMemoryTag;
                        }
                    }

                    dataRecordCount++;

                    if (dataRecordMaxLenght < text.Length)
                    {
                        dataRecordMaxLenght = text.Length;                                      //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length );
                    }
                }

                indexHeader.SetDataRecordCount(dataRecordCount);
                indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght));
                #endregion
            }

            #region [.Calulate Tag collision statistica.]
            foreach (var memorySlot in hashTable)
            {
                if (memorySlot == null)
                {
                    //empty slot's in hash-table
                    indexHeader.TagCollisionStatistica.IncremetByKey(-1);
                }
                else
                {
                    //Current tag chain depth
                    int currentTagChainDepth = 0;

                    //Get first memory-tag
                    MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                    while (memoryTag != null)
                    {
                        //Get next tag
                        memoryTag = memoryTag.NextMemoryTag;

                        //Current tag chain depth
                        currentTagChainDepth++;
                    }

                    //
                    indexHeader.TagCollisionStatistica.IncremetByKey(currentTagChainDepth);
                }
            }
            #endregion

            //
            using (var indexFileBinaryWriter = IndexFileHelper.CreateBinaryWriterRandomAccess(indexHeader.GetIndexFileFullName()))
            {
                #region [.3 write hash table on disk.]
                DiskSlot diskSlot           = new DiskSlot();
                DiskTag  diskTag            = new DiskTag();
                byte[]   emptyDiskSlotBytes = DiskSlot.GetEmptyDiskSlot().StructureToByteArray();

                //Write header in index file
                indexFileBinaryWriter.SeekFromBegin(0);
                indexHeader.SerializeIndexHeader(indexFileBinaryWriter);

                fpos_t tagAreaOffset = indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlot.SizeOf;

                uint hashTableItemIndex = 0;
                foreach (var memorySlot in hashTable)
                {
                    //Empty hash-table slot
                    if (memorySlot == null)
                    {
                        //Write in index-file empty slot
                        indexFileBinaryWriter.SeekAndWrite
                        (
                            indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf,
                            emptyDiskSlotBytes
                        );
                    }
                    //Have a data hash-table slot
                    else
                    {
                        //Calc data-slot properties
                        diskSlot.PositionInDataFile = memorySlot.PositionInDataFile;
                        diskSlot.FirstTagOffset     = (memorySlot.FirstMemoryTag != null) ? tagAreaOffset : 0;

                        //Write in index-file data-slot
                        indexFileBinaryWriter.SeekAndWrite
                        (
                            indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf,
                            diskSlot.StructureToByteArray()
                        );

                        //Calc Tag collision statistica
                        int currentTagChainDepth = 0;

                        //Get first memory-tag
                        MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                        while (memoryTag != null)
                        {
                            //Calc disk-tag properties
                            diskTag.PositionInDataFile = memoryTag.PositionInDataFile;
                            diskTag.NextTagOffset      = (memoryTag.NextMemoryTag != null) ? tagAreaOffset + DiskTag.SizeOf : 0;

                            //Write in index-file
                            indexFileBinaryWriter.SeekAndWrite
                            (
                                tagAreaOffset,
                                diskTag.StructureToByteArray()
                            );
                            //Calc disk-tag offset
                            tagAreaOffset += DiskTag.SizeOf;
                            //Get next tag
                            memoryTag = memoryTag.NextMemoryTag;

                            //Calc Tag collision statistica
                            currentTagChainDepth++;
                        }

                        /*
                         * //Calulate Tag collision statistica
                         * //---indexHeader.TagCollisionStatistica.IncremetByKey( currentTagChainDepth /*memoryHash.GetTagChainDepth()* / );
                         */
                    }

                    hashTableItemIndex++;
                }
                #endregion
            }

            //Free memory
            hashTable = null;
            GC.Collect();
        }