예제 #1
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="input">Char stream.</param>
 /// <param name="normalizeKanji">Indicates whether kanji iteration marks should be normalized.</param>
 /// <param name="normalizeKana">Indicates whether kana iteration marks should be normalized.</param>
 public JapaneseIterationMarkCharFilter(TextReader input, bool normalizeKanji, bool normalizeKana)
     : base(input)
 {
     this.normalizeKanji = normalizeKanji;
     this.normalizeKana  = normalizeKana;
     buffer.Reset(input);
 }
예제 #2
0
        /// <summary>
        /// Default constructor that takes a <seealso cref="TextReader"/>. </summary>
        public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
        {
            buffer.Reset(@in);

            map            = normMap.map;
            cachedRootArcs = normMap.cachedRootArcs;

            if (map != null)
            {
                fstReader = map.BytesReader;
            }
            else
            {
                fstReader = null;
            }
        }
예제 #3
0
        /// <summary>
        /// Default constructor that takes a <seealso cref="TextReader"/>. </summary>
        public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
        {
            //LUCENENET support to reset the reader.
            _input = GetBufferedReader(@in);
            _input.Mark(BufferedCharFilter.defaultCharBufferSize);
            buffer.Reset(_input);
            //buffer.Reset(@in);

            map            = normMap.map;
            cachedRootArcs = normMap.cachedRootArcs;

            if (map != null)
            {
                fstReader = map.BytesReader;
            }
            else
            {
                fstReader = null;
            }
        }
예제 #4
0
        /// <summary>
        /// Default constructor that takes a <see cref="TextReader"/>. </summary>
        public MappingCharFilter(NormalizeCharMap normMap, TextReader @in)
            : base(@in)
        {
            //LUCENENET support to reset the reader.
            _input = GetBufferedReader(@in);
            _input.Mark(BufferedCharFilter.DEFAULT_CHAR_BUFFER_SIZE);
            buffer.Reset(_input);
            //buffer.Reset(@in);

            map            = normMap.map;
            cachedRootArcs = normMap.cachedRootArcs;

            if (map != null)
            {
                fstReader = map.GetBytesReader();
            }
            else
            {
                fstReader = null;
            }
        }
예제 #5
0
        public virtual void Test()
        {
            var ITERS = AtLeast(1000);

            var buffer = new RollingCharBuffer();

            var random = Random();

            for (var iter = 0; iter < ITERS; iter++)
            {
                var stringLen = random.NextBoolean() ? random.Next(50) : random.Next(20000);

                string s;
                if (stringLen == 0)
                {
                    s = "";
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random, stringLen);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " s.length()=" + s.Length);
                }
                buffer.Reset(new StringReader(s));
                var nextRead   = 0;
                var availCount = 0;
                while (nextRead < s.Length)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  cycle nextRead=" + nextRead + " avail=" + availCount);
                    }
                    if (availCount == 0 || random.NextBoolean())
                    {
                        // Read next char
                        if (VERBOSE)
                        {
                            Console.WriteLine("    new char");
                        }
                        assertEquals(s[nextRead], buffer.Get(nextRead));
                        nextRead++;
                        availCount++;
                    }
                    else if (random.NextBoolean())
                    {
                        // Read previous char
                        var pos = TestUtil.NextInt(random, nextRead - availCount, nextRead - 1);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    old char pos=" + pos);
                        }
                        assertEquals(s[pos], buffer.Get(pos));
                    }
                    else
                    {
                        // Read slice
                        int length;
                        if (availCount == 1)
                        {
                            length = 1;
                        }
                        else
                        {
                            length = TestUtil.NextInt(random, 1, availCount);
                        }
                        int start;
                        if (length == availCount)
                        {
                            start = nextRead - availCount;
                        }
                        else
                        {
                            start = nextRead - availCount + random.Next(availCount - length);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("    slice start=" + start + " length=" + length);
                        }
                        assertEquals(s.Substring(start, length), new string(buffer.Get(start, length)));
                    }

                    if (availCount > 0 && random.Next(20) == 17)
                    {
                        var toFree = random.Next(availCount);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    free " + toFree + " (avail=" + (availCount - toFree) + ")");
                        }
                        buffer.FreeBefore(nextRead - (availCount - toFree));
                        availCount -= toFree;
                    }
                }
            }
        }