コード例 #1
0
        public virtual void TestRandomMaps2()
        {
            Random random        = Random();
            int    numIterations = AtLeast(3);

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST iter=" + iter);
                }

                char endLetter = (char)TestUtil.NextInt(random, 'b', 'z');
                IDictionary <string, string> map     = new Dictionary <string, string>();
                NormalizeCharMap.Builder     builder = new NormalizeCharMap.Builder();
                int numMappings = AtLeast(5);
                if (VERBOSE)
                {
                    Console.WriteLine("  mappings:");
                }
                while (map.Count < numMappings)
                {
                    string key = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, 7);
                    if (key.Length != 0 && !map.ContainsKey(key))
                    {
                        string value = TestUtil.RandomSimpleString(random);
                        map[key] = value;
                        builder.Add(key, value);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + key + " -> " + value);
                        }
                    }
                }

                NormalizeCharMap charMap = builder.Build();

                if (VERBOSE)
                {
                    Console.WriteLine("  test random documents...");
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    string content = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, AtLeast(1000));

                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + content);
                    }

                    // Do stupid dog-slow mapping:

                    // Output string:
                    StringBuilder output = new StringBuilder();

                    // Maps output offset to input offset:
                    IList <int?> inputOffsets = new List <int?>();

                    int cumDiff = 0;
                    int charIdx = 0;
                    while (charIdx < content.Length)
                    {
                        int    matchLen  = -1;
                        string matchRepl = null;

                        foreach (KeyValuePair <string, string> ent in map)
                        {
                            string match = ent.Key;
                            if (charIdx + match.Length <= content.Length)
                            {
                                int  limit   = charIdx + match.Length;
                                bool matches = true;
                                for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++)
                                {
                                    if (match[charIdx2 - charIdx] != content[charIdx2])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }

                                if (matches)
                                {
                                    string repl = ent.Value;
                                    if (match.Length > matchLen)
                                    {
                                        // Greedy: longer match wins
                                        matchLen  = match.Length;
                                        matchRepl = repl;
                                    }
                                }
                            }
                        }

                        if (matchLen != -1)
                        {
                            // We found a match here!
                            if (VERBOSE)
                            {
                                Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
                            }
                            output.Append(matchRepl);
                            int minLen = Math.Min(matchLen, matchRepl.Length);

                            // Common part, directly maps back to input
                            // offset:
                            for (int outIdx = 0; outIdx < minLen; outIdx++)
                            {
                                inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
                            }

                            cumDiff += matchLen - matchRepl.Length;
                            charIdx += matchLen;

                            if (matchRepl.Length < matchLen)
                            {
                                // Replacement string is shorter than matched
                                // input: nothing to do
                            }
                            else if (matchRepl.Length > matchLen)
                            {
                                // Replacement string is longer than matched
                                // input: for all the "extra" chars we map
                                // back to a single input offset:
                                for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++)
                                {
                                    inputOffsets.Add(output.Length + cumDiff - 1);
                                }
                            }
                            else
                            {
                                // Same length: no change to offset
                            }

                            Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
                        }
                        else
                        {
                            inputOffsets.Add(output.Length + cumDiff);
                            output.Append(content[charIdx]);
                            charIdx++;
                        }
                    }

                    string expected = output.ToString();
                    if (VERBOSE)
                    {
                        Console.Write("    expected:");
                        for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++)
                        {
                            Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
                        }
                        Console.WriteLine();
                    }

                    MappingCharFilter mapFilter          = new MappingCharFilter(charMap, new StringReader(content));
                    StringBuilder     actualBuilder      = new StringBuilder();
                    IList <int?>      actualInputOffsets = new List <int?>();

                    // Now consume the actual mapFilter, somewhat randomly:
                    while (true)
                    {
                        if (random.Next(0, 1) == 1)
                        {
                            int ch = mapFilter.Read();
                            if (ch == -1)
                            {
                                break;
                            }
                            actualBuilder.Append((char)ch);
                        }
                        else
                        {
                            char[] buffer = new char[TestUtil.NextInt(random, 1, 100)];
                            int    off    = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
                            int    count  = mapFilter.Read(buffer, off, buffer.Length - off);
                            if (count == -1)
                            {
                                break;
                            }
                            else
                            {
                                actualBuilder.Append(buffer, off, count);
                            }
                        }

                        if (random.Next(10) == 7)
                        {
                            // Map offsets
                            while (actualInputOffsets.Count < actualBuilder.Length)
                            {
                                actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count));
                            }
                        }
                    }

                    // Finish mappping offsets
                    while (actualInputOffsets.Count < actualBuilder.Length)
                    {
                        actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count));
                    }

                    string actual = actualBuilder.ToString();

                    // Verify:
                    assertEquals(expected, actual);
                    assertEquals(inputOffsets, actualInputOffsets);
                }
            }
        }
コード例 #2
0
        public virtual void TestRandomMaps2()
        {
            Random random = Random();
            int numIterations = AtLeast(3);
            for (int iter = 0; iter < numIterations; iter++)
            {

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST iter=" + iter);
                }

                char endLetter = (char)TestUtil.NextInt(random, 'b', 'z');
                IDictionary<string, string> map = new Dictionary<string, string>();
                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
                int numMappings = AtLeast(5);
                if (VERBOSE)
                {
                    Console.WriteLine("  mappings:");
                }
                while (map.Count < numMappings)
                {
                    string key = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, 7);
                    if (key.Length != 0 && !map.ContainsKey(key))
                    {
                        string value = TestUtil.RandomSimpleString(random);
                        map[key] = value;
                        builder.Add(key, value);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + key + " -> " + value);
                        }
                    }
                }

                NormalizeCharMap charMap = builder.Build();

                if (VERBOSE)
                {
                    Console.WriteLine("  test random documents...");
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    string content = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, AtLeast(1000));

                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + content);
                    }

                    // Do stupid dog-slow mapping:

                    // Output string:
                    StringBuilder output = new StringBuilder();

                    // Maps output offset to input offset:
                    IList<int?> inputOffsets = new List<int?>();

                    int cumDiff = 0;
                    int charIdx = 0;
                    while (charIdx < content.Length)
                    {

                        int matchLen = -1;
                        string matchRepl = null;

                        foreach (KeyValuePair<string, string> ent in map)
                        {
                            string match = ent.Key;
                            if (charIdx + match.Length <= content.Length)
                            {
                                int limit = charIdx + match.Length;
                                bool matches = true;
                                for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++)
                                {
                                    if (match[charIdx2 - charIdx] != content[charIdx2])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }

                                if (matches)
                                {
                                    string repl = ent.Value;
                                    if (match.Length > matchLen)
                                    {
                                        // Greedy: longer match wins
                                        matchLen = match.Length;
                                        matchRepl = repl;
                                    }
                                }
                            }
                        }

                        if (matchLen != -1)
                        {
                            // We found a match here!
                            if (VERBOSE)
                            {
                                Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
                            }
                            output.Append(matchRepl);
                            int minLen = Math.Min(matchLen, matchRepl.Length);

                            // Common part, directly maps back to input
                            // offset:
                            for (int outIdx = 0; outIdx < minLen; outIdx++)
                            {
                                inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
                            }

                            cumDiff += matchLen - matchRepl.Length;
                            charIdx += matchLen;

                            if (matchRepl.Length < matchLen)
                            {
                                // Replacement string is shorter than matched
                                // input: nothing to do
                            }
                            else if (matchRepl.Length > matchLen)
                            {
                                // Replacement string is longer than matched
                                // input: for all the "extra" chars we map
                                // back to a single input offset:
                                for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++)
                                {
                                    inputOffsets.Add(output.Length + cumDiff - 1);
                                }
                            }
                            else
                            {
                                // Same length: no change to offset
                            }

                            Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
                        }
                        else
                        {
                            inputOffsets.Add(output.Length + cumDiff);
                            output.Append(content[charIdx]);
                            charIdx++;
                        }
                    }

                    string expected = output.ToString();
                    if (VERBOSE)
                    {
                        Console.Write("    expected:");
                        for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++)
                        {
                            Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
                        }
                        Console.WriteLine();
                    }

                    MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));
                    StringBuilder actualBuilder = new StringBuilder();
                    IList<int?> actualInputOffsets = new List<int?>();

                    // Now consume the actual mapFilter, somewhat randomly:
                    while (true)
                    {
                        if (random.Next(0, 1) == 1)
                        {
                            int ch = mapFilter.Read();
                            if (ch == -1)
                            {
                                break;
                            }
                            actualBuilder.Append((char)ch);
                        }
                        else
                        {
                            char[] buffer = new char[TestUtil.NextInt(random, 1, 100)];
                            int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
                            int count = mapFilter.Read(buffer, off, buffer.Length - off);
                            if (count == -1)
                            {
                                break;
                            }
                            else
                            {
                                actualBuilder.Append(buffer, off, count);
                            }
                        }

                        if (random.Next(10) == 7)
                        {
                            // Map offsets
                            while (actualInputOffsets.Count < actualBuilder.Length)
                            {
                                actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count));
                            }
                        }
                    }

                    // Finish mappping offsets
                    while (actualInputOffsets.Count < actualBuilder.Length)
                    {
                        actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count));
                    }

                    string actual = actualBuilder.ToString();

                    // Verify:
                    assertEquals(expected, actual);
                    assertEquals(inputOffsets, actualInputOffsets);
                }
            }
        }