public void NullSourceNameAndText() { TextLineReader reader = new TextLineReader(null, null); Assert.IsNull(reader.SourceName); AssertReaderAtEnd(reader); }
public static uint CalulateAllRecordCountInDataFile(string dataFileFullName, Encoding dataFileEncoding) { if (dataFileFullName.IsEmptyOrNull()) { throw (new ArgumentNullException("dataFileFullname")); } if (dataFileEncoding == null) { throw (new ArgumentNullException("dataFileEncoding")); } uint dataRecordCount = 0; using (var dataFileTextLineReader = new TextLineReader(dataFileFullName, dataFileEncoding)) { while (!dataFileTextLineReader.EndOfStream) { //[ReadLine4Searching()] used for speeding, no need here truth file-pointer position if (!dataFileTextLineReader.ReadLine4Searching().IsEmptyOrNull()) { dataRecordCount++; } } } return(dataRecordCount); }
public void SourceNameAndNullText() { string sourceName = "text input"; TextLineReader reader = new TextLineReader(sourceName, null); Assert.AreEqual(sourceName, reader.SourceName); AssertReaderAtEnd(reader); }
//--------------------------------------------------------------------- private TextLineReader MakeTableReader(params string[] lines) { TextLineReader reader = MakeReader(lines); reader.SkipBlankLines = true; reader.CommentLineMarker = ">"; reader.SkipCommentLines = true; return(reader); }
public void IntStr() { reader = MakeReader("IntVariable -78", "StringVariable /usr/local/bin"); ParseResult result = parser.Parse(reader); Assert.IsNotNull(result); Assert.AreEqual(-78, result.IntValue); Assert.AreEqual(0, result.FloatValue); Assert.AreEqual("/usr/local/bin", result.StringValue); }
public void String() { string str = "Four score and seven years ago"; TextLineReader reader = new TextLineReader(str); Assert.IsNull(reader.SourceName); Assert.AreEqual(str, reader.ReadLine()); Assert.AreEqual(1, reader.LineNumber); AssertReaderAtEnd(reader); }
public HashtableSearcher(string dataFileFullName, NormlizeTextFunction normlizeTextFunction = null) { if (dataFileFullName.IsEmptyOrNull()) { throw (new ArgumentNullException("dataFileFullName")); } var indexFileFullName = IndexFileHelper.GetIndexFileFullName(dataFileFullName); this.IndexHeader = HashtableIndexer.GetIndexHeaderByIndexFile(indexFileFullName); if (!this.IndexHeader.IsSignValid()) { throw (new DiskSearchEngineException("Invalid sign of index file: '" + indexFileFullName + "'.")); } if (!this.IndexHeader.IsCorrectSizeAndModifyTime(dataFileFullName)) { throw (new DiskSearchEngineException("Data file was modify after last indexing.")); } this._DataFileTextLineReader = new TextLineReader ( dataFileFullName, this.IndexHeader.DataFileEncoding, this.IndexHeader.DataRecordMaxBytesLenght + this.IndexHeader.DataFileEncoding.GetMaxByteCount(10) ); this._IndexFileBinaryReader = IndexFileHelper.CreateBinaryReaderRandomAccess(indexFileFullName); this._DiskSlotReadBuffer = new ReadBuffer <DiskSlot>(); this._DiskTagReadBuffer = new ReadBuffer <DiskTag>(); this._DiskSlotInt32ReadBuffer = new ReadBuffer <DiskSlotInt32>(); this._DiskTagInt32ReadBuffer = new ReadBuffer <DiskTagInt32>(); this._NormlizeTextFunction = normlizeTextFunction; switch (this.IndexHeader.HashtableSlotType) { case HashtableIndexFileHeader.SlotType.Int64: this._IsExistsFunction = IsExistsDiskRoutine; break; case HashtableIndexFileHeader.SlotType.Int32: this._IsExistsFunction = IsExistsDiskRoutineIn32; break; default: throw (new ArgumentException("this.IndexHeader.HashtableSlotType")); } }
public void IntFloatStr_BlankLines() { reader = MakeReader("IntVariable -78", "FloatVariable .099", "StringVariable /usr/local/bin", "", " \t \n", "\n"); ParseResult result = parser.Parse(reader); Assert.IsNotNull(result); Assert.AreEqual(-78, result.IntValue); Assert.AreEqual(.099, result.FloatValue, .00001); Assert.AreEqual("/usr/local/bin", result.StringValue); }
public void StringArray() { TextLineReader reader = new TextLineReader(array); Assert.IsNull(reader.SourceName); int expectedLineNum = 0; foreach (string str in array) { Assert.AreEqual(str, reader.ReadLine()); expectedLineNum++; Assert.AreEqual(expectedLineNum, reader.LineNumber); } AssertReaderAtEnd(reader); }
//--------------------------------------------------------------------- private void TryParseTable(int expectedLineNum, params string[] lines) { reader = MakeTableReader(lines); try { ParseResult[] result = tableParser.Parse(reader); } catch (System.Exception e) { Data.Output.WriteLine(e.Message); LineReaderException lrExc = e as LineReaderException; if (lrExc != null) { Assert.AreEqual(expectedLineNum, lrExc.LineNumber); } throw; } }
public void MultiLineText() { MultiLineText text = new MultiLineText(array); TextLineReader reader = new TextLineReader(text); Assert.IsNull(reader.SourceName); int expectedLineNum = 0; foreach (string line in text) { Assert.AreEqual(line, reader.ReadLine()); expectedLineNum++; Assert.AreEqual(expectedLineNum, reader.LineNumber); } AssertReaderAtEnd(reader); }
public List <ITrackingRecordEntity> CreateFromFile(string fileNameWithPath) { var trackingRecordEntities = new List <ITrackingRecordEntity>(); var textContext = FileManager.ReadAllText(fileNameWithPath); TextLineReader.SetText(textContext); var allLines = TextLineReader.GetLines(); foreach (var line in allLines) { if (TextFileTrackingRecordFormatManager.IsFormatCorrect(line)) { var trackingRecordFromText = TextFileTrackingRecordFormatManager.GetTrackingRecordFromText(line); trackingRecordFromText.FileNameWithPath = fileNameWithPath; trackingRecordEntities.Add(trackingRecordFromText); } } return(trackingRecordEntities); }
public void Table_3Rows() { ParseResult[] expectedResult = new ParseResult[3]; expectedResult[0] = new ParseResult(1, -123.45f, "Maine"); expectedResult[1] = new ParseResult(2, 0.00098f, "Hawaii"); expectedResult[2] = new ParseResult(3, 7e+8f, "Florida"); reader = MakeTableReader("TableName", "", "> StrVar FloatVar IntVar", expectedResult[0].ToString(), expectedResult[1].ToString(), expectedResult[2].ToString()); ParseResult[] result = tableParser.Parse(reader); Assert.AreEqual(expectedResult.Length, result.Length); for (int i = 0; i < expectedResult.Length; ++i) { Assert.AreEqual(expectedResult[i].IntValue, result[i].IntValue); Assert.AreEqual(expectedResult[i].FloatValue, result[i].FloatValue); Assert.AreEqual(expectedResult[i].StringValue, result[i].StringValue); } }
public void StringList() { List <string> list = new List <string>(array.Length); foreach (string str in array) { list.Add(str); } TextLineReader reader = new TextLineReader(list); Assert.IsNull(reader.SourceName); int expectedLineNum = 0; foreach (string str in list) { Assert.AreEqual(str, reader.ReadLine()); expectedLineNum++; Assert.AreEqual(expectedLineNum, reader.LineNumber); } AssertReaderAtEnd(reader); }
public void Dispose() { if (_DataFileTextLineReader != null) { _DataFileTextLineReader.Dispose(); _DataFileTextLineReader = null; } if (_IndexFileBinaryReader != null) { _IndexFileBinaryReader.BaseStream.Dispose(); _IndexFileBinaryReader = null; } if (_DiskSlotReadBuffer != null) { _DiskSlotReadBuffer.Dispose(); _DiskSlotReadBuffer = null; } if (_DiskTagReadBuffer != null) { _DiskTagReadBuffer.Dispose(); _DiskTagReadBuffer = null; } if (_DiskSlotInt32ReadBuffer != null) { _DiskSlotInt32ReadBuffer.Dispose(); _DiskSlotInt32ReadBuffer = null; } if (_DiskTagInt32ReadBuffer != null) { _DiskTagInt32ReadBuffer.Dispose(); _DiskTagInt32ReadBuffer = null; } }
public void Init() { reader = null; parser = new Parser(); tableParser = new TableParser(); }
//--------------------------------------------------------------------- private InputLine MakeInputLine(params string[] lines) { TextLineReader reader = MakeReader(lines); return(new InputLine(reader)); }
public void Table_Empty() { reader = MakeTableReader("TableName"); ParseResult[] result = tableParser.Parse(reader); Assert.AreEqual(0, result.Length); }
//======================================================// public static void BuildIndexInt32OnDisk(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction = null) { //create header-of-disk-hashtable var indexHeader = new HashtableIndexFileHeader ( dataFileFullName, dataFileEncoding, hashTableSize, HashtableIndexFileHeader.SlotType.Int32 ); //create index-file using (var indexFileStream = IndexFileHelper.CreateFileStreamReadWrite(indexHeader.GetIndexFileFullName())) using (var diskSlotInt32ReadBuffer = new ReadBuffer <DiskSlotInt32>()) using (var diskTagInt32ReadBuffer = new ReadBuffer <DiskTagInt32>()) { //Write header in index file indexFileStream.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileStream); Int64 tagAreaOffset = (indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlotInt32.SizeOf); //Guard from tagAreaOffset > MemorySlotInt32.MaxValue if (tagAreaOffset > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue)); } //fill disk-hashtable empty slot's byte[] emptyDiskSlotBytes = DiskSlotInt32.GetEmptyDiskSlot().StructureToByteArray(); for (var i = 0U; i < hashTableSize; i++) { indexFileStream.WriteBytes(emptyDiskSlotBytes); } DiskTagInt32 diskTag = new DiskTagInt32(); //open data-file using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding)) { #region [.build index on disk.] int dataRecordCount = 0; int dataRecordMaxLenght = 0; while (!dataFileTextLineReader.EndOfStream) { Int64 _posInt64 = dataFileTextLineReader.StreamPosition; //Guard from file-pointer > MemorySlotInt32.MaxValue if (_posInt64 > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("file-pointer more then allow possible value => file-pointer: " + _posInt64 + ", possible value: " + MemorySlotInt32.MaxValue)); } Int32 position = (Int32)_posInt64; var text = dataFileTextLineReader.ReadLine4Indexing(); if (text.IsEmptyOrNull()) { continue; } //Normlize text if allowed if (normlizeTextFunction != null) { text = normlizeTextFunction(text); if (text.IsEmptyOrNull()) { continue; } } uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize); #region [.code.] DiskSlotInt32 diskSlot = indexFileStream.SeekAndReadDiskSlotInt32 ( indexHeader.SizeOf + hashCode * DiskSlotInt32.SizeOf, diskSlotInt32ReadBuffer ); //Free slot => search text not found if (diskSlot.PositionInDataFile == Consts.FREE_INT32) { //Calc data-slot properties diskSlot.PositionInDataFile = position; diskSlot.FirstTagOffset = 0; //Write in index-file data-slot indexFileStream.SeekFromCurrentAndWrite ( -diskSlotInt32ReadBuffer.Size, diskSlot.StructureToByteArray() ); } else { if (diskSlot.FirstTagOffset == 0) { diskSlot.FirstTagOffset = (Int32)tagAreaOffset; //ReWrite in index-file data-slot indexFileStream.SeekFromCurrentAndWrite ( -diskSlotInt32ReadBuffer.Size, diskSlot.StructureToByteArray() ); } else //if ( diskSlot.FirstTagOffset != 0 ) { //Search in hashtable tag chain's diskTag.NextTagOffset = diskSlot.FirstTagOffset; while (0 < diskTag.NextTagOffset) { diskTag = indexFileStream.SeekAndReadDiskTagInt32(diskTag.NextTagOffset, diskTagInt32ReadBuffer); } //Calc disk-tag properties diskTag.PositionInDataFile = position; diskTag.NextTagOffset = (Int32)tagAreaOffset; //ReWrite previous tag in chain indexFileStream.SeekFromCurrentAndWrite ( -diskTagInt32ReadBuffer.Size, diskTag.StructureToByteArray() ); } //Calc disk-tag properties diskTag.PositionInDataFile = position; diskTag.NextTagOffset = 0; //Write in index-file indexFileStream.SeekAndWrite ( tagAreaOffset, diskTag.StructureToByteArray() ); //Calc disk-tag offset tagAreaOffset += DiskTagInt32.SizeOf; //Guard from tagAreaOffset > MemorySlotInt32.MaxValue if (tagAreaOffset > MemorySlotInt32.MaxValue) { throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue)); } } #endregion dataRecordCount++; if (dataRecordMaxLenght < text.Length) { dataRecordMaxLenght = text.Length; //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length ); } } indexHeader.SetDataRecordCount(dataRecordCount); indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght)); #endregion } //one more time - Write header in index file indexFileStream.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileStream); } }
/// <summary> /// Int64 file-pointer version /// </summary> /// <param name="dataFileFullName"></param> /// <param name="dataFileEncoding"></param> /// <param name="hashTableSize"></param> private static void BuildIndexInMemory(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction) { // var indexHeader = new HashtableIndexFileHeader ( dataFileFullName, dataFileEncoding, hashTableSize ); // var hashTable = new MemorySlot[indexHeader.HashtableSize]; // using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding)) { #region [.2 calc index in memory.] int dataRecordCount = 0; int dataRecordMaxLenght = 0; while (!dataFileTextLineReader.EndOfStream) { fpos_t position = dataFileTextLineReader.StreamPosition; var text = dataFileTextLineReader.ReadLine4Indexing(); if (text.IsEmptyOrNull()) { continue; } //Normlize text if allowed if (normlizeTextFunction != null) { text = normlizeTextFunction(text); if (text.IsEmptyOrNull()) { continue; } } uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize); MemorySlot memorySlot = hashTable[hashCode]; if (memorySlot == null) { hashTable[hashCode] = new MemorySlot(position); } else { MemoryTag newMemoryTag = new MemoryTag(position); if (memorySlot.FirstMemoryTag == null) { memorySlot.FirstMemoryTag = newMemoryTag; } else { MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag.NextMemoryTag != null) { memoryTag = memoryTag.NextMemoryTag; } memoryTag.NextMemoryTag = newMemoryTag; } } dataRecordCount++; if (dataRecordMaxLenght < text.Length) { dataRecordMaxLenght = text.Length; //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length ); } } indexHeader.SetDataRecordCount(dataRecordCount); indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght)); #endregion } #region [.Calulate Tag collision statistica.] foreach (var memorySlot in hashTable) { if (memorySlot == null) { //empty slot's in hash-table indexHeader.TagCollisionStatistica.IncremetByKey(-1); } else { //Current tag chain depth int currentTagChainDepth = 0; //Get first memory-tag MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag != null) { //Get next tag memoryTag = memoryTag.NextMemoryTag; //Current tag chain depth currentTagChainDepth++; } // indexHeader.TagCollisionStatistica.IncremetByKey(currentTagChainDepth); } } #endregion // using (var indexFileBinaryWriter = IndexFileHelper.CreateBinaryWriterRandomAccess(indexHeader.GetIndexFileFullName())) { #region [.3 write hash table on disk.] DiskSlot diskSlot = new DiskSlot(); DiskTag diskTag = new DiskTag(); byte[] emptyDiskSlotBytes = DiskSlot.GetEmptyDiskSlot().StructureToByteArray(); //Write header in index file indexFileBinaryWriter.SeekFromBegin(0); indexHeader.SerializeIndexHeader(indexFileBinaryWriter); fpos_t tagAreaOffset = indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlot.SizeOf; uint hashTableItemIndex = 0; foreach (var memorySlot in hashTable) { //Empty hash-table slot if (memorySlot == null) { //Write in index-file empty slot indexFileBinaryWriter.SeekAndWrite ( indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf, emptyDiskSlotBytes ); } //Have a data hash-table slot else { //Calc data-slot properties diskSlot.PositionInDataFile = memorySlot.PositionInDataFile; diskSlot.FirstTagOffset = (memorySlot.FirstMemoryTag != null) ? tagAreaOffset : 0; //Write in index-file data-slot indexFileBinaryWriter.SeekAndWrite ( indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf, diskSlot.StructureToByteArray() ); //Calc Tag collision statistica int currentTagChainDepth = 0; //Get first memory-tag MemoryTag memoryTag = memorySlot.FirstMemoryTag; while (memoryTag != null) { //Calc disk-tag properties diskTag.PositionInDataFile = memoryTag.PositionInDataFile; diskTag.NextTagOffset = (memoryTag.NextMemoryTag != null) ? tagAreaOffset + DiskTag.SizeOf : 0; //Write in index-file indexFileBinaryWriter.SeekAndWrite ( tagAreaOffset, diskTag.StructureToByteArray() ); //Calc disk-tag offset tagAreaOffset += DiskTag.SizeOf; //Get next tag memoryTag = memoryTag.NextMemoryTag; //Calc Tag collision statistica currentTagChainDepth++; } /* * //Calulate Tag collision statistica * //---indexHeader.TagCollisionStatistica.IncremetByKey( currentTagChainDepth /*memoryHash.GetTagChainDepth()* / ); */ } hashTableItemIndex++; } #endregion } //Free memory hashTable = null; GC.Collect(); }
//--------------------------------------------------------------------- private void AssertReaderAtEnd(TextLineReader reader) { Assert.IsNull(reader.ReadLine()); Assert.AreEqual(reader.LineNumber, LineReader.EndOfInput); }