public void NullSourceNameAndText()
        {
            TextLineReader reader = new TextLineReader(null, null);

            Assert.IsNull(reader.SourceName);
            AssertReaderAtEnd(reader);
        }
        public static uint CalulateAllRecordCountInDataFile(string dataFileFullName, Encoding dataFileEncoding)
        {
            if (dataFileFullName.IsEmptyOrNull())
            {
                throw (new ArgumentNullException("dataFileFullname"));
            }
            if (dataFileEncoding == null)
            {
                throw (new ArgumentNullException("dataFileEncoding"));
            }


            uint dataRecordCount = 0;

            using (var dataFileTextLineReader = new TextLineReader(dataFileFullName, dataFileEncoding))
            {
                while (!dataFileTextLineReader.EndOfStream)
                {
                    //[ReadLine4Searching()] used for speeding, no need here truth file-pointer position
                    if (!dataFileTextLineReader.ReadLine4Searching().IsEmptyOrNull())
                    {
                        dataRecordCount++;
                    }
                }
            }
            return(dataRecordCount);
        }
        public void SourceNameAndNullText()
        {
            string         sourceName = "text input";
            TextLineReader reader     = new TextLineReader(sourceName, null);

            Assert.AreEqual(sourceName, reader.SourceName);
            AssertReaderAtEnd(reader);
        }
Ejemplo n.º 4
0
        //---------------------------------------------------------------------

        private TextLineReader MakeTableReader(params string[] lines)
        {
            TextLineReader reader = MakeReader(lines);

            reader.SkipBlankLines    = true;
            reader.CommentLineMarker = ">";
            reader.SkipCommentLines  = true;
            return(reader);
        }
Ejemplo n.º 5
0
        public void IntStr()
        {
            reader = MakeReader("IntVariable -78",
                                "StringVariable /usr/local/bin");
            ParseResult result = parser.Parse(reader);

            Assert.IsNotNull(result);
            Assert.AreEqual(-78, result.IntValue);
            Assert.AreEqual(0, result.FloatValue);
            Assert.AreEqual("/usr/local/bin", result.StringValue);
        }
        public void String()
        {
            string         str    = "Four score and seven years ago";
            TextLineReader reader = new TextLineReader(str);

            Assert.IsNull(reader.SourceName);

            Assert.AreEqual(str, reader.ReadLine());
            Assert.AreEqual(1, reader.LineNumber);

            AssertReaderAtEnd(reader);
        }
Ejemplo n.º 7
0
        public HashtableSearcher(string dataFileFullName, NormlizeTextFunction normlizeTextFunction = null)
        {
            if (dataFileFullName.IsEmptyOrNull())
            {
                throw (new ArgumentNullException("dataFileFullName"));
            }


            var indexFileFullName = IndexFileHelper.GetIndexFileFullName(dataFileFullName);

            this.IndexHeader = HashtableIndexer.GetIndexHeaderByIndexFile(indexFileFullName);

            if (!this.IndexHeader.IsSignValid())
            {
                throw (new DiskSearchEngineException("Invalid sign of index file: '" + indexFileFullName + "'."));
            }
            if (!this.IndexHeader.IsCorrectSizeAndModifyTime(dataFileFullName))
            {
                throw (new DiskSearchEngineException("Data file was modify after last indexing."));
            }


            this._DataFileTextLineReader = new TextLineReader
                                           (
                dataFileFullName,
                this.IndexHeader.DataFileEncoding,
                this.IndexHeader.DataRecordMaxBytesLenght + this.IndexHeader.DataFileEncoding.GetMaxByteCount(10)
                                           );
            this._IndexFileBinaryReader = IndexFileHelper.CreateBinaryReaderRandomAccess(indexFileFullName);

            this._DiskSlotReadBuffer = new ReadBuffer <DiskSlot>();
            this._DiskTagReadBuffer  = new ReadBuffer <DiskTag>();

            this._DiskSlotInt32ReadBuffer = new ReadBuffer <DiskSlotInt32>();
            this._DiskTagInt32ReadBuffer  = new ReadBuffer <DiskTagInt32>();

            this._NormlizeTextFunction = normlizeTextFunction;

            switch (this.IndexHeader.HashtableSlotType)
            {
            case HashtableIndexFileHeader.SlotType.Int64:
                this._IsExistsFunction = IsExistsDiskRoutine;
                break;

            case HashtableIndexFileHeader.SlotType.Int32:
                this._IsExistsFunction = IsExistsDiskRoutineIn32;
                break;

            default:
                throw (new ArgumentException("this.IndexHeader.HashtableSlotType"));
            }
        }
Ejemplo n.º 8
0
        public void IntFloatStr_BlankLines()
        {
            reader = MakeReader("IntVariable -78",
                                "FloatVariable  .099",
                                "StringVariable /usr/local/bin",
                                "",
                                " \t \n",
                                "\n");
            ParseResult result = parser.Parse(reader);

            Assert.IsNotNull(result);
            Assert.AreEqual(-78, result.IntValue);
            Assert.AreEqual(.099, result.FloatValue, .00001);
            Assert.AreEqual("/usr/local/bin", result.StringValue);
        }
        public void StringArray()
        {
            TextLineReader reader = new TextLineReader(array);

            Assert.IsNull(reader.SourceName);

            int expectedLineNum = 0;

            foreach (string str in array)
            {
                Assert.AreEqual(str, reader.ReadLine());
                expectedLineNum++;
                Assert.AreEqual(expectedLineNum, reader.LineNumber);
            }

            AssertReaderAtEnd(reader);
        }
Ejemplo n.º 10
0
        //---------------------------------------------------------------------

        private void TryParseTable(int expectedLineNum,
                                   params string[] lines)
        {
            reader = MakeTableReader(lines);
            try {
                ParseResult[] result = tableParser.Parse(reader);
            }
            catch (System.Exception e) {
                Data.Output.WriteLine(e.Message);
                LineReaderException lrExc = e as LineReaderException;
                if (lrExc != null)
                {
                    Assert.AreEqual(expectedLineNum, lrExc.LineNumber);
                }
                throw;
            }
        }
        public void MultiLineText()
        {
            MultiLineText text = new MultiLineText(array);

            TextLineReader reader = new TextLineReader(text);

            Assert.IsNull(reader.SourceName);

            int expectedLineNum = 0;

            foreach (string line in text)
            {
                Assert.AreEqual(line, reader.ReadLine());
                expectedLineNum++;
                Assert.AreEqual(expectedLineNum, reader.LineNumber);
            }

            AssertReaderAtEnd(reader);
        }
        public List <ITrackingRecordEntity> CreateFromFile(string fileNameWithPath)
        {
            var trackingRecordEntities = new List <ITrackingRecordEntity>();
            var textContext            = FileManager.ReadAllText(fileNameWithPath);

            TextLineReader.SetText(textContext);
            var allLines = TextLineReader.GetLines();

            foreach (var line in allLines)
            {
                if (TextFileTrackingRecordFormatManager.IsFormatCorrect(line))
                {
                    var trackingRecordFromText = TextFileTrackingRecordFormatManager.GetTrackingRecordFromText(line);
                    trackingRecordFromText.FileNameWithPath = fileNameWithPath;
                    trackingRecordEntities.Add(trackingRecordFromText);
                }
            }
            return(trackingRecordEntities);
        }
Ejemplo n.º 13
0
        public void Table_3Rows()
        {
            ParseResult[] expectedResult = new ParseResult[3];
            expectedResult[0] = new ParseResult(1, -123.45f, "Maine");
            expectedResult[1] = new ParseResult(2, 0.00098f, "Hawaii");
            expectedResult[2] = new ParseResult(3, 7e+8f, "Florida");

            reader = MakeTableReader("TableName",
                                     "",
                                     "> StrVar   FloatVar   IntVar",
                                     expectedResult[0].ToString(),
                                     expectedResult[1].ToString(),
                                     expectedResult[2].ToString());
            ParseResult[] result = tableParser.Parse(reader);
            Assert.AreEqual(expectedResult.Length, result.Length);
            for (int i = 0; i < expectedResult.Length; ++i)
            {
                Assert.AreEqual(expectedResult[i].IntValue, result[i].IntValue);
                Assert.AreEqual(expectedResult[i].FloatValue, result[i].FloatValue);
                Assert.AreEqual(expectedResult[i].StringValue, result[i].StringValue);
            }
        }
        public void StringList()
        {
            List <string> list = new List <string>(array.Length);

            foreach (string str in array)
            {
                list.Add(str);
            }

            TextLineReader reader = new TextLineReader(list);

            Assert.IsNull(reader.SourceName);

            int expectedLineNum = 0;

            foreach (string str in list)
            {
                Assert.AreEqual(str, reader.ReadLine());
                expectedLineNum++;
                Assert.AreEqual(expectedLineNum, reader.LineNumber);
            }

            AssertReaderAtEnd(reader);
        }
Ejemplo n.º 15
0
        public void Dispose()
        {
            if (_DataFileTextLineReader != null)
            {
                _DataFileTextLineReader.Dispose();
                _DataFileTextLineReader = null;
            }

            if (_IndexFileBinaryReader != null)
            {
                _IndexFileBinaryReader.BaseStream.Dispose();
                _IndexFileBinaryReader = null;
            }

            if (_DiskSlotReadBuffer != null)
            {
                _DiskSlotReadBuffer.Dispose();
                _DiskSlotReadBuffer = null;
            }
            if (_DiskTagReadBuffer != null)
            {
                _DiskTagReadBuffer.Dispose();
                _DiskTagReadBuffer = null;
            }

            if (_DiskSlotInt32ReadBuffer != null)
            {
                _DiskSlotInt32ReadBuffer.Dispose();
                _DiskSlotInt32ReadBuffer = null;
            }
            if (_DiskTagInt32ReadBuffer != null)
            {
                _DiskTagInt32ReadBuffer.Dispose();
                _DiskTagInt32ReadBuffer = null;
            }
        }
Ejemplo n.º 16
0
 public void Init()
 {
     reader      = null;
     parser      = new Parser();
     tableParser = new TableParser();
 }
Ejemplo n.º 17
0
        //---------------------------------------------------------------------

        private InputLine MakeInputLine(params string[] lines)
        {
            TextLineReader reader = MakeReader(lines);

            return(new InputLine(reader));
        }
Ejemplo n.º 18
0
 public void Table_Empty()
 {
     reader = MakeTableReader("TableName");
     ParseResult[] result = tableParser.Parse(reader);
     Assert.AreEqual(0, result.Length);
 }
        //======================================================//
        public static void BuildIndexInt32OnDisk(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction = null)
        {
            //create header-of-disk-hashtable
            var indexHeader = new HashtableIndexFileHeader
                              (
                dataFileFullName,
                dataFileEncoding,
                hashTableSize,
                HashtableIndexFileHeader.SlotType.Int32
                              );

            //create index-file
            using (var indexFileStream = IndexFileHelper.CreateFileStreamReadWrite(indexHeader.GetIndexFileFullName()))
                using (var diskSlotInt32ReadBuffer = new ReadBuffer <DiskSlotInt32>())
                    using (var diskTagInt32ReadBuffer = new ReadBuffer <DiskTagInt32>())
                    {
                        //Write header in index file
                        indexFileStream.SeekFromBegin(0);
                        indexHeader.SerializeIndexHeader(indexFileStream);

                        Int64 tagAreaOffset = (indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlotInt32.SizeOf);

                        //Guard from tagAreaOffset > MemorySlotInt32.MaxValue
                        if (tagAreaOffset > MemorySlotInt32.MaxValue)
                        {
                            throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue));
                        }

                        //fill disk-hashtable empty slot's
                        byte[] emptyDiskSlotBytes = DiskSlotInt32.GetEmptyDiskSlot().StructureToByteArray();
                        for (var i = 0U; i < hashTableSize; i++)
                        {
                            indexFileStream.WriteBytes(emptyDiskSlotBytes);
                        }

                        DiskTagInt32 diskTag = new DiskTagInt32();

                        //open data-file
                        using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding))
                        {
                            #region [.build index on disk.]
                            int dataRecordCount     = 0;
                            int dataRecordMaxLenght = 0;
                            while (!dataFileTextLineReader.EndOfStream)
                            {
                                Int64 _posInt64 = dataFileTextLineReader.StreamPosition;

                                //Guard from file-pointer > MemorySlotInt32.MaxValue
                                if (_posInt64 > MemorySlotInt32.MaxValue)
                                {
                                    throw (new InvalidOperationException("file-pointer more then allow possible value => file-pointer: " + _posInt64 + ", possible value: " + MemorySlotInt32.MaxValue));
                                }

                                Int32 position = (Int32)_posInt64;

                                var text = dataFileTextLineReader.ReadLine4Indexing();
                                if (text.IsEmptyOrNull())
                                {
                                    continue;
                                }
                                //Normlize text if allowed
                                if (normlizeTextFunction != null)
                                {
                                    text = normlizeTextFunction(text);

                                    if (text.IsEmptyOrNull())
                                    {
                                        continue;
                                    }
                                }

                                uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize);

                                #region [.code.]
                                DiskSlotInt32 diskSlot = indexFileStream.SeekAndReadDiskSlotInt32
                                                         (
                                    indexHeader.SizeOf + hashCode * DiskSlotInt32.SizeOf,
                                    diskSlotInt32ReadBuffer
                                                         );

                                //Free slot => search text not found
                                if (diskSlot.PositionInDataFile == Consts.FREE_INT32)
                                {
                                    //Calc data-slot properties
                                    diskSlot.PositionInDataFile = position;
                                    diskSlot.FirstTagOffset     = 0;

                                    //Write in index-file data-slot
                                    indexFileStream.SeekFromCurrentAndWrite
                                    (
                                        -diskSlotInt32ReadBuffer.Size,
                                        diskSlot.StructureToByteArray()
                                    );
                                }
                                else
                                {
                                    if (diskSlot.FirstTagOffset == 0)
                                    {
                                        diskSlot.FirstTagOffset = (Int32)tagAreaOffset;

                                        //ReWrite in index-file data-slot
                                        indexFileStream.SeekFromCurrentAndWrite
                                        (
                                            -diskSlotInt32ReadBuffer.Size,
                                            diskSlot.StructureToByteArray()
                                        );
                                    }
                                    else //if ( diskSlot.FirstTagOffset != 0 )
                                    {
                                        //Search in hashtable tag chain's
                                        diskTag.NextTagOffset = diskSlot.FirstTagOffset;
                                        while (0 < diskTag.NextTagOffset)
                                        {
                                            diskTag = indexFileStream.SeekAndReadDiskTagInt32(diskTag.NextTagOffset, diskTagInt32ReadBuffer);
                                        }

                                        //Calc disk-tag properties
                                        diskTag.PositionInDataFile = position;
                                        diskTag.NextTagOffset      = (Int32)tagAreaOffset;

                                        //ReWrite previous tag in chain
                                        indexFileStream.SeekFromCurrentAndWrite
                                        (
                                            -diskTagInt32ReadBuffer.Size,
                                            diskTag.StructureToByteArray()
                                        );
                                    }

                                    //Calc disk-tag properties
                                    diskTag.PositionInDataFile = position;
                                    diskTag.NextTagOffset      = 0;

                                    //Write in index-file
                                    indexFileStream.SeekAndWrite
                                    (
                                        tagAreaOffset,
                                        diskTag.StructureToByteArray()
                                    );

                                    //Calc disk-tag offset
                                    tagAreaOffset += DiskTagInt32.SizeOf;

                                    //Guard from tagAreaOffset > MemorySlotInt32.MaxValue
                                    if (tagAreaOffset > MemorySlotInt32.MaxValue)
                                    {
                                        throw (new InvalidOperationException("tag area offset more then allow possible value => tag area offset: " + tagAreaOffset + ", possible value: " + MemorySlotInt32.MaxValue));
                                    }
                                }
                                #endregion

                                dataRecordCount++;

                                if (dataRecordMaxLenght < text.Length)
                                {
                                    dataRecordMaxLenght = text.Length;                              //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length );
                                }
                            }

                            indexHeader.SetDataRecordCount(dataRecordCount);
                            indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght));
                            #endregion
                        }

                        //one more time - Write header in index file
                        indexFileStream.SeekFromBegin(0);
                        indexHeader.SerializeIndexHeader(indexFileStream);
                    }
        }
        /// <summary>
        /// Int64 file-pointer version
        /// </summary>
        /// <param name="dataFileFullName"></param>
        /// <param name="dataFileEncoding"></param>
        /// <param name="hashTableSize"></param>
        private static void BuildIndexInMemory(string dataFileFullName, Encoding dataFileEncoding, uint hashTableSize, NormlizeTextFunction normlizeTextFunction)
        {
            //
            var indexHeader = new HashtableIndexFileHeader
                              (
                dataFileFullName,
                dataFileEncoding,
                hashTableSize
                              );
            //
            var hashTable = new MemorySlot[indexHeader.HashtableSize];

            //
            using (var dataFileTextLineReader = new TextLineReader(indexHeader.DataFileFullName, indexHeader.DataFileEncoding))
            {
                #region [.2 calc index in memory.]
                int dataRecordCount     = 0;
                int dataRecordMaxLenght = 0;
                while (!dataFileTextLineReader.EndOfStream)
                {
                    fpos_t position = dataFileTextLineReader.StreamPosition;

                    var text = dataFileTextLineReader.ReadLine4Indexing();
                    if (text.IsEmptyOrNull())
                    {
                        continue;
                    }
                    //Normlize text if allowed
                    if (normlizeTextFunction != null)
                    {
                        text = normlizeTextFunction(text);

                        if (text.IsEmptyOrNull())
                        {
                            continue;
                        }
                    }

                    uint hashCode = IndexFileHelper.HashFunction(text, indexHeader.HashtableSize);

                    MemorySlot memorySlot = hashTable[hashCode];
                    if (memorySlot == null)
                    {
                        hashTable[hashCode] = new MemorySlot(position);
                    }
                    else
                    {
                        MemoryTag newMemoryTag = new MemoryTag(position);

                        if (memorySlot.FirstMemoryTag == null)
                        {
                            memorySlot.FirstMemoryTag = newMemoryTag;
                        }
                        else
                        {
                            MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                            while (memoryTag.NextMemoryTag != null)
                            {
                                memoryTag = memoryTag.NextMemoryTag;
                            }
                            memoryTag.NextMemoryTag = newMemoryTag;
                        }
                    }

                    dataRecordCount++;

                    if (dataRecordMaxLenght < text.Length)
                    {
                        dataRecordMaxLenght = text.Length;                                      //dataRecordMaxLenght = Math.Max( dataRecordMaxLenght, text.Length );
                    }
                }

                indexHeader.SetDataRecordCount(dataRecordCount);
                indexHeader.SetDataRecordMaxBytesLenght(indexHeader.DataFileEncoding.GetMaxByteCount(dataRecordMaxLenght));
                #endregion
            }

            #region [.Calulate Tag collision statistica.]
            foreach (var memorySlot in hashTable)
            {
                if (memorySlot == null)
                {
                    //empty slot's in hash-table
                    indexHeader.TagCollisionStatistica.IncremetByKey(-1);
                }
                else
                {
                    //Current tag chain depth
                    int currentTagChainDepth = 0;

                    //Get first memory-tag
                    MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                    while (memoryTag != null)
                    {
                        //Get next tag
                        memoryTag = memoryTag.NextMemoryTag;

                        //Current tag chain depth
                        currentTagChainDepth++;
                    }

                    //
                    indexHeader.TagCollisionStatistica.IncremetByKey(currentTagChainDepth);
                }
            }
            #endregion

            //
            using (var indexFileBinaryWriter = IndexFileHelper.CreateBinaryWriterRandomAccess(indexHeader.GetIndexFileFullName()))
            {
                #region [.3 write hash table on disk.]
                DiskSlot diskSlot           = new DiskSlot();
                DiskTag  diskTag            = new DiskTag();
                byte[]   emptyDiskSlotBytes = DiskSlot.GetEmptyDiskSlot().StructureToByteArray();

                //Write header in index file
                indexFileBinaryWriter.SeekFromBegin(0);
                indexHeader.SerializeIndexHeader(indexFileBinaryWriter);

                fpos_t tagAreaOffset = indexHeader.SizeOf + indexHeader.HashtableSize * DiskSlot.SizeOf;

                uint hashTableItemIndex = 0;
                foreach (var memorySlot in hashTable)
                {
                    //Empty hash-table slot
                    if (memorySlot == null)
                    {
                        //Write in index-file empty slot
                        indexFileBinaryWriter.SeekAndWrite
                        (
                            indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf,
                            emptyDiskSlotBytes
                        );
                    }
                    //Have a data hash-table slot
                    else
                    {
                        //Calc data-slot properties
                        diskSlot.PositionInDataFile = memorySlot.PositionInDataFile;
                        diskSlot.FirstTagOffset     = (memorySlot.FirstMemoryTag != null) ? tagAreaOffset : 0;

                        //Write in index-file data-slot
                        indexFileBinaryWriter.SeekAndWrite
                        (
                            indexHeader.SizeOf + hashTableItemIndex * DiskSlot.SizeOf,
                            diskSlot.StructureToByteArray()
                        );

                        //Calc Tag collision statistica
                        int currentTagChainDepth = 0;

                        //Get first memory-tag
                        MemoryTag memoryTag = memorySlot.FirstMemoryTag;
                        while (memoryTag != null)
                        {
                            //Calc disk-tag properties
                            diskTag.PositionInDataFile = memoryTag.PositionInDataFile;
                            diskTag.NextTagOffset      = (memoryTag.NextMemoryTag != null) ? tagAreaOffset + DiskTag.SizeOf : 0;

                            //Write in index-file
                            indexFileBinaryWriter.SeekAndWrite
                            (
                                tagAreaOffset,
                                diskTag.StructureToByteArray()
                            );
                            //Calc disk-tag offset
                            tagAreaOffset += DiskTag.SizeOf;
                            //Get next tag
                            memoryTag = memoryTag.NextMemoryTag;

                            //Calc Tag collision statistica
                            currentTagChainDepth++;
                        }

                        /*
                         * //Calulate Tag collision statistica
                         * //---indexHeader.TagCollisionStatistica.IncremetByKey( currentTagChainDepth /*memoryHash.GetTagChainDepth()* / );
                         */
                    }

                    hashTableItemIndex++;
                }
                #endregion
            }

            //Free memory
            hashTable = null;
            GC.Collect();
        }
        //---------------------------------------------------------------------

        private void AssertReaderAtEnd(TextLineReader reader)
        {
            Assert.IsNull(reader.ReadLine());
            Assert.AreEqual(reader.LineNumber, LineReader.EndOfInput);
        }