public virtual void TestRandomMaps2() { Random random = Random(); int numIterations = AtLeast(3); for (int iter = 0; iter < numIterations; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST iter=" + iter); } char endLetter = (char)TestUtil.NextInt(random, 'b', 'z'); IDictionary <string, string> map = new Dictionary <string, string>(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); int numMappings = AtLeast(5); if (VERBOSE) { Console.WriteLine(" mappings:"); } while (map.Count < numMappings) { string key = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, 7); if (key.Length != 0 && !map.ContainsKey(key)) { string value = TestUtil.RandomSimpleString(random); map[key] = value; builder.Add(key, value); if (VERBOSE) { Console.WriteLine(" " + key + " -> " + value); } } } NormalizeCharMap charMap = builder.Build(); if (VERBOSE) { Console.WriteLine(" test random documents..."); } for (int iter2 = 0; iter2 < 100; iter2++) { string content = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, AtLeast(1000)); if (VERBOSE) { Console.WriteLine(" content=" + content); } // Do stupid dog-slow mapping: // Output string: StringBuilder output = new StringBuilder(); // Maps output offset to input offset: IList <int?> inputOffsets = new List <int?>(); int cumDiff = 0; int charIdx = 0; while (charIdx < content.Length) { int matchLen = -1; string matchRepl = null; foreach (KeyValuePair <string, string> ent in map) { string match = ent.Key; if (charIdx + match.Length <= content.Length) { int limit = charIdx + match.Length; bool matches = true; for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++) { if (match[charIdx2 - charIdx] != content[charIdx2]) { matches = false; break; } } if (matches) { string repl = ent.Value; if (match.Length > matchLen) { // Greedy: longer match wins matchLen = match.Length; matchRepl = repl; } } } } if (matchLen != -1) { // We found a match here! if (VERBOSE) { Console.WriteLine(" match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl); } output.Append(matchRepl); int minLen = Math.Min(matchLen, matchRepl.Length); // Common part, directly maps back to input // offset: for (int outIdx = 0; outIdx < minLen; outIdx++) { inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff); } cumDiff += matchLen - matchRepl.Length; charIdx += matchLen; if (matchRepl.Length < matchLen) { // Replacement string is shorter than matched // input: nothing to do } else if (matchRepl.Length > matchLen) { // Replacement string is longer than matched // input: for all the "extra" chars we map // back to a single input offset: for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++) { inputOffsets.Add(output.Length + cumDiff - 1); } } else { // Same length: no change to offset } Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length); } else { inputOffsets.Add(output.Length + cumDiff); output.Append(content[charIdx]); charIdx++; } } string expected = output.ToString(); if (VERBOSE) { Console.Write(" expected:"); for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++) { Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]); } Console.WriteLine(); } MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content)); StringBuilder actualBuilder = new StringBuilder(); IList <int?> actualInputOffsets = new List <int?>(); // Now consume the actual mapFilter, somewhat randomly: while (true) { if (random.Next(0, 1) == 1) { int ch = mapFilter.Read(); if (ch == -1) { break; } actualBuilder.Append((char)ch); } else { char[] buffer = new char[TestUtil.NextInt(random, 1, 100)]; int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1); int count = mapFilter.Read(buffer, off, buffer.Length - off); if (count == -1) { break; } else { actualBuilder.Append(buffer, off, count); } } if (random.Next(10) == 7) { // Map offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count)); } } } // Finish mappping offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count)); } string actual = actualBuilder.ToString(); // Verify: assertEquals(expected, actual); assertEquals(inputOffsets, actualInputOffsets); } } }
public virtual void TestRandomMaps2() { Random random = Random(); int numIterations = AtLeast(3); for (int iter = 0; iter < numIterations; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST iter=" + iter); } char endLetter = (char)TestUtil.NextInt(random, 'b', 'z'); IDictionary<string, string> map = new Dictionary<string, string>(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); int numMappings = AtLeast(5); if (VERBOSE) { Console.WriteLine(" mappings:"); } while (map.Count < numMappings) { string key = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, 7); if (key.Length != 0 && !map.ContainsKey(key)) { string value = TestUtil.RandomSimpleString(random); map[key] = value; builder.Add(key, value); if (VERBOSE) { Console.WriteLine(" " + key + " -> " + value); } } } NormalizeCharMap charMap = builder.Build(); if (VERBOSE) { Console.WriteLine(" test random documents..."); } for (int iter2 = 0; iter2 < 100; iter2++) { string content = TestUtil.RandomSimpleStringRange(random, 'a', endLetter, AtLeast(1000)); if (VERBOSE) { Console.WriteLine(" content=" + content); } // Do stupid dog-slow mapping: // Output string: StringBuilder output = new StringBuilder(); // Maps output offset to input offset: IList<int?> inputOffsets = new List<int?>(); int cumDiff = 0; int charIdx = 0; while (charIdx < content.Length) { int matchLen = -1; string matchRepl = null; foreach (KeyValuePair<string, string> ent in map) { string match = ent.Key; if (charIdx + match.Length <= content.Length) { int limit = charIdx + match.Length; bool matches = true; for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++) { if (match[charIdx2 - charIdx] != content[charIdx2]) { matches = false; break; } } if (matches) { string repl = ent.Value; if (match.Length > matchLen) { // Greedy: longer match wins matchLen = match.Length; matchRepl = repl; } } } } if (matchLen != -1) { // We found a match here! if (VERBOSE) { Console.WriteLine(" match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl); } output.Append(matchRepl); int minLen = Math.Min(matchLen, matchRepl.Length); // Common part, directly maps back to input // offset: for (int outIdx = 0; outIdx < minLen; outIdx++) { inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff); } cumDiff += matchLen - matchRepl.Length; charIdx += matchLen; if (matchRepl.Length < matchLen) { // Replacement string is shorter than matched // input: nothing to do } else if (matchRepl.Length > matchLen) { // Replacement string is longer than matched // input: for all the "extra" chars we map // back to a single input offset: for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++) { inputOffsets.Add(output.Length + cumDiff - 1); } } else { // Same length: no change to offset } Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length); } else { inputOffsets.Add(output.Length + cumDiff); output.Append(content[charIdx]); charIdx++; } } string expected = output.ToString(); if (VERBOSE) { Console.Write(" expected:"); for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++) { Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]); } Console.WriteLine(); } MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content)); StringBuilder actualBuilder = new StringBuilder(); IList<int?> actualInputOffsets = new List<int?>(); // Now consume the actual mapFilter, somewhat randomly: while (true) { if (random.Next(0, 1) == 1) { int ch = mapFilter.Read(); if (ch == -1) { break; } actualBuilder.Append((char)ch); } else { char[] buffer = new char[TestUtil.NextInt(random, 1, 100)]; int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1); int count = mapFilter.Read(buffer, off, buffer.Length - off); if (count == -1) { break; } else { actualBuilder.Append(buffer, off, count); } } if (random.Next(10) == 7) { // Map offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count)); } } } // Finish mappping offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.CorrectOffset(actualInputOffsets.Count)); } string actual = actualBuilder.ToString(); // Verify: assertEquals(expected, actual); assertEquals(inputOffsets, actualInputOffsets); } } }