//-private DiskTag diskTag = new DiskTag(); private bool IsExistsDiskRoutine(string searchText) { //Normlize text if allowed if (_NormlizeTextFunction != null) { searchText = _NormlizeTextFunction(searchText); } //Calculate hash-code uint hashCode = IndexFileHelper.HashFunction(searchText, this.IndexHeader.HashtableSize); //Search in hashtable //Read structure [DiskHtRecord_t] from index file DiskSlot diskSlot = _IndexFileBinaryReader.SeekAndReadDiskSlot ( this.IndexHeader.SizeOf + hashCode * DiskSlot.SizeOf, _DiskSlotReadBuffer ); //Free slot => search text not found if (diskSlot.PositionInDataFile == Consts.FREE) { // == NOT FOUND return(false); /*"ни хуяшки нету :("*/ } //Set file-pointer in position into data-file //Read text from data-file var text = _DataFileTextLineReader.SeekAndRead4Searching(diskSlot.PositionInDataFile); //Normlize text if allowed if (_NormlizeTextFunction != null) { text = _NormlizeTextFunction(text); } //Input text equal text in data-file => search text found (with first step) if (text == searchText) { // == FOUND ! first step return(true); } //Search in hashtable tag chain's var diskTag = new DiskTag() { NextTagOffset = diskSlot.FirstTagOffset }; while (0 < diskTag.NextTagOffset) { diskTag = _IndexFileBinaryReader.SeekAndReadDiskTag(diskTag.NextTagOffset, _DiskTagReadBuffer); text = _DataFileTextLineReader.SeekAndRead4Searching(diskTag.PositionInDataFile); //Normlize text if allowed if (_NormlizeTextFunction != null) { text = _NormlizeTextFunction(text); } if (text == searchText) { // == FOUND ! return(true); } } // == NOT FOUND return(false); }
/// <summary> /// Int64 file-pointer version /// </summary> /// <param name="dataFileFullName"></param> /// <param name="dataFileEncoding"></param> /// <param name="hashTableSize"></param> private static void BuildIndexInMemory(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction) { // var indexHeader = new HashtableIndexFileHeader ( dataFileFullName, dataFileEncoding, hashTableSize ); // var hashTable = new MemorySlot[indexHeader.HashtableSize]; // using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding)) { #region [.2 calc index in memory.] int dataRecordCount = 0; int dataRecordMaxLenght = 0; while (!dataFileTextLineReader.EndOfStream) { fpos_t position = dataFileTextLineReader.StreamPosition; var text = dataFileTextLineReader.ReadLine4Indexing(); if (text.IsEmptyOrNull()) { continue; } //Normlize text if allowed if (normlizeTextFunction != null) { text = normlizeTextFunction(text); if (text.IsEmptyOrNull()) { continue; } } uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize); MemorySlot memorySlot = hashTable[hashCode]; if (memorySlot == null) { hashTable[hashCode] = new MemorySlot(position); } else { MemoryTag newMemoryTag = new MemoryTag(position); if (memorySlot.FirstMemoryTag == null) { memorySlot.FirstMemoryTag = newMemoryTag; } else { MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag.NextMemoryTag != null) { memoryTag = memoryTag.NextMemoryTag; } memoryTag.NextMemoryTag = newMemoryTag; } } dataRecordCount++; if (dataRecordMaxLenght < text.Length) { dataRecordMaxLenght = text.Length; //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length ); } } indexHeader.SetDataRecordCount(dataRecordCount); indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght)); #endregion } #region [.Calulate Tag collision statistica.] foreach (var memorySlot in hashTable) { if (memorySlot == null) { //empty slot's in hash-table indexHeader.TagCollisionStatistica.IncremetByKey(-1); } else { //Current tag chain depth int currentTagChainDepth = 0; //Get first memory-tag MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag != null) { //Get next tag memoryTag = memoryTag.NextMemoryTag; //Current tag chain depth currentTagChainDepth++; } // indexHeader.TagCollisionStatistica.IncremetByKey(currentTagChainDepth); } } #endregion // using (var indexFileBinaryWriter = IndexFileHelper.CreateBinaryWriterRandomAccess(indexHeader.GetIndexFileFullName())) { #region [.3 write hash table on disk.] DiskSlot diskSlot = new DiskSlot(); DiskTag diskTag = new DiskTag(); byte[] emptyDiskSlotBytes = DiskSlot.GetEmptyDiskSlot().StructureToByteArray(); //Write header in index file indexFileBinaryWriter.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileBinaryWriter); fpos_t tagAreaOffset = indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlot.SizeOf; uint hashTableItemIndex = 0; foreach (var memorySlot in hashTable) { //Empty hash-table slot if (memorySlot == null) { //Write in index-file empty slot indexFileBinaryWriter.SeekAndWrite ( indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf, emptyDiskSlotBytes ); } //Have a data hash-table slot else { //Calc data-slot properties diskSlot.PositionInDataFile = memorySlot.PositionInDataFile; diskSlot.FirstTagOffset = (memorySlot.FirstMemoryTag != null) ? tagAreaOffset : 0; //Write in index-file data-slot indexFileBinaryWriter.SeekAndWrite ( indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf, diskSlot.StructureToByteArray() ); //Calc Tag collision statistica int currentTagChainDepth = 0; //Get first memory-tag MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag != null) { //Calc disk-tag properties diskTag.PositionInDataFile = memoryTag.PositionInDataFile; diskTag.NextTagOffset = (memoryTag.NextMemoryTag != null) ? tagAreaOffset + DiskTag.SizeOf : 0; //Write in index-file indexFileBinaryWriter.SeekAndWrite ( tagAreaOffset, diskTag.StructureToByteArray() ); //Calc disk-tag offset tagAreaOffset += DiskTag.SizeOf; //Get next tag memoryTag = memoryTag.NextMemoryTag; //Calc Tag collision statistica currentTagChainDepth++; } /* * //Calulate Tag collision statistica * //---indexHeader.TagCollisionStatistica.IncremetByKey( currentTagChainDepth /*memoryHash.GetTagChainDepth()* / ); */ } hashTableItemIndex++; } #endregion } //Free memory hashTable = null; GC.Collect(); }