//======================================================// public static void BuildIndexInt32OnDisk(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction = null) { //create header-of-disk-hashtable var indexHeader = new HashtableIndexFileHeader ( dataFileFullName, dataFileEncoding, hashTableSize, HashtableIndexFileHeader.SlotType.Int32 ); //create index-file using (var indexFileStream = IndexFileHelper.CreateFileStreamReadWrite(indexHeader.GetIndexFileFullName())) using (var diskSlotInt32ReadBuffer = new ReadBuffer <DiskSlotInt32>()) using (var diskTagInt32ReadBuffer = new ReadBuffer <DiskTagInt32>()) { //Write header in index file indexFileStream.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileStream); Int64 tagAreaOffset = (indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlotInt32.SizeOf); //Guard from tagAreaOffset > MemorySlotInt32.MaxValue if (tagAreaOffset > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue)); } //fill disk-hashtable empty slot's byte[] emptyDiskSlotBytes = DiskSlotInt32.GetEmptyDiskSlot().StructureToByteArray(); for (var i = 0U; i < hashTableSize; i++) { indexFileStream.WriteBytes(emptyDiskSlotBytes); } DiskTagInt32 diskTag = new DiskTagInt32(); //open data-file using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding)) { #region [.build index on disk.] int dataRecordCount = 0; int dataRecordMaxLenght = 0; while (!dataFileTextLineReader.EndOfStream) { Int64 _posInt64 = dataFileTextLineReader.StreamPosition; //Guard from file-pointer > MemorySlotInt32.MaxValue if (_posInt64 > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("file-pointer more then allow possible value => file-pointer: " + _posInt64 + ", possible value: " + MemorySlotInt32.MaxValue)); } Int32 position = (Int32)_posInt64; var text = dataFileTextLineReader.ReadLine4Indexing(); if (text.IsEmptyOrNull()) { continue; } //Normlize text if allowed if (normlizeTextFunction != null) { text = normlizeTextFunction(text); if (text.IsEmptyOrNull()) { continue; } } uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize); #region [.code.] DiskSlotInt32 diskSlot = indexFileStream.SeekAndReadDiskSlotInt32 ( indexHeader.SizeOf + hashCode * DiskSlotInt32.SizeOf, diskSlotInt32ReadBuffer ); //Free slot => search text not found if (diskSlot.PositionInDataFile == Consts.FREE_INT32) { //Calc data-slot properties diskSlot.PositionInDataFile = position; diskSlot.FirstTagOffset = 0; //Write in index-file data-slot indexFileStream.SeekFromCurrentAndWrite ( -diskSlotInt32ReadBuffer.Size, diskSlot.StructureToByteArray() ); } else { if (diskSlot.FirstTagOffset == 0) { diskSlot.FirstTagOffset = (Int32)tagAreaOffset; //ReWrite in index-file data-slot indexFileStream.SeekFromCurrentAndWrite ( -diskSlotInt32ReadBuffer.Size, diskSlot.StructureToByteArray() ); } else //if ( diskSlot.FirstTagOffset != 0 ) { //Search in hashtable tag chain's diskTag.NextTagOffset = diskSlot.FirstTagOffset; while (0 < diskTag.NextTagOffset) { diskTag = indexFileStream.SeekAndReadDiskTagInt32(diskTag.NextTagOffset, diskTagInt32ReadBuffer); } //Calc disk-tag properties diskTag.PositionInDataFile = position; diskTag.NextTagOffset = (Int32)tagAreaOffset; //ReWrite previous tag in chain indexFileStream.SeekFromCurrentAndWrite ( -diskTagInt32ReadBuffer.Size, diskTag.StructureToByteArray() ); } //Calc disk-tag properties diskTag.PositionInDataFile = position; diskTag.NextTagOffset = 0; //Write in index-file indexFileStream.SeekAndWrite ( tagAreaOffset, diskTag.StructureToByteArray() ); //Calc disk-tag offset tagAreaOffset += DiskTagInt32.SizeOf; //Guard from tagAreaOffset > MemorySlotInt32.MaxValue if (tagAreaOffset > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue)); } } #endregion dataRecordCount++; if (dataRecordMaxLenght < text.Length) { dataRecordMaxLenght = text.Length; //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length ); } } indexHeader.SetDataRecordCount(dataRecordCount); indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght)); #endregion } //one more time - Write header in index file indexFileStream.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileStream); } }