This class converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. Characters from the following Unicode blocks are converted; however, only those characters with reasonable ASCII alternatives are converted: See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode For example, 'à' will be replaced by 'a'.
Inheritance: TokenFilter
 /// <summary>
 /// Pop one input token's worth of tokens off the filter and verify that they are as expected.
 /// </summary>
 internal virtual void assertNextTerms(string expectedUnfolded, string expectedFolded, ASCIIFoldingFilter filter, ICharTermAttribute termAtt)
 {
     assertTrue(filter.IncrementToken());
     assertEquals(expectedFolded, termAtt.ToString());
     if (filter.PreserveOriginal && !expectedUnfolded.Equals(expectedFolded))
     {
         assertTrue(filter.IncrementToken());
         assertEquals(expectedUnfolded, termAtt.ToString());
     }
 }
        public virtual void testLatin1Accents()
        {
            TokenStream stream = new MockTokenizer(new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ" + " Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij" + " ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"), MockTokenizer.WHITESPACE, false);
            ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream, Random().nextBoolean());

            ICharTermAttribute termAtt = filter.GetAttribute<ICharTermAttribute>();
            filter.Reset();
            assertNextTerms("Des", "Des", filter, termAtt);
            assertNextTerms("mot", "mot", filter, termAtt);
            assertNextTerms("clés", "cles", filter, termAtt);
            assertNextTerms("À", "A", filter, termAtt);
            assertNextTerms("LA", "LA", filter, termAtt);
            assertNextTerms("CHAÎNE", "CHAINE", filter, termAtt);
            assertNextTerms("À", "A", filter, termAtt);
            assertNextTerms("Á", "A", filter, termAtt);
            assertNextTerms("Â", "A", filter, termAtt);
            assertNextTerms("Ã", "A", filter, termAtt);
            assertNextTerms("Ä", "A", filter, termAtt);
            assertNextTerms("Å", "A", filter, termAtt);
            assertNextTerms("Æ", "AE", filter, termAtt);
            assertNextTerms("Ç", "C", filter, termAtt);
            assertNextTerms("È", "E", filter, termAtt);
            assertNextTerms("É", "E", filter, termAtt);
            assertNextTerms("Ê", "E", filter, termAtt);
            assertNextTerms("Ë", "E", filter, termAtt);
            assertNextTerms("Ì", "I", filter, termAtt);
            assertNextTerms("Í", "I", filter, termAtt);
            assertNextTerms("Î", "I", filter, termAtt);
            assertNextTerms("Ï", "I", filter, termAtt);
            assertNextTerms("IJ", "IJ", filter, termAtt);
            assertNextTerms("Ð", "D", filter, termAtt);
            assertNextTerms("Ñ", "N", filter, termAtt);
            assertNextTerms("Ò", "O", filter, termAtt);
            assertNextTerms("Ó", "O", filter, termAtt);
            assertNextTerms("Ô", "O", filter, termAtt);
            assertNextTerms("Õ", "O", filter, termAtt);
            assertNextTerms("Ö", "O", filter, termAtt);
            assertNextTerms("Ø", "O", filter, termAtt);
            assertNextTerms("Œ", "OE", filter, termAtt);
            assertNextTerms("Þ", "TH", filter, termAtt);
            assertNextTerms("Ù", "U", filter, termAtt);
            assertNextTerms("Ú", "U", filter, termAtt);
            assertNextTerms("Û", "U", filter, termAtt);
            assertNextTerms("Ü", "U", filter, termAtt);
            assertNextTerms("Ý", "Y", filter, termAtt);
            assertNextTerms("Ÿ", "Y", filter, termAtt);
            assertNextTerms("à", "a", filter, termAtt);
            assertNextTerms("á", "a", filter, termAtt);
            assertNextTerms("â", "a", filter, termAtt);
            assertNextTerms("ã", "a", filter, termAtt);
            assertNextTerms("ä", "a", filter, termAtt);
            assertNextTerms("å", "a", filter, termAtt);
            assertNextTerms("æ", "ae", filter, termAtt);
            assertNextTerms("ç", "c", filter, termAtt);
            assertNextTerms("è", "e", filter, termAtt);
            assertNextTerms("é", "e", filter, termAtt);
            assertNextTerms("ê", "e", filter, termAtt);
            assertNextTerms("ë", "e", filter, termAtt);
            assertNextTerms("ì", "i", filter, termAtt);
            assertNextTerms("í", "i", filter, termAtt);
            assertNextTerms("î", "i", filter, termAtt);
            assertNextTerms("ï", "i", filter, termAtt);
            assertNextTerms("ij", "ij", filter, termAtt);
            assertNextTerms("ð", "d", filter, termAtt);
            assertNextTerms("ñ", "n", filter, termAtt);
            assertNextTerms("ò", "o", filter, termAtt);
            assertNextTerms("ó", "o", filter, termAtt);
            assertNextTerms("ô", "o", filter, termAtt);
            assertNextTerms("õ", "o", filter, termAtt);
            assertNextTerms("ö", "o", filter, termAtt);
            assertNextTerms("ø", "o", filter, termAtt);
            assertNextTerms("œ", "oe", filter, termAtt);
            assertNextTerms("ß", "ss", filter, termAtt);
            assertNextTerms("þ", "th", filter, termAtt);
            assertNextTerms("ù", "u", filter, termAtt);
            assertNextTerms("ú", "u", filter, termAtt);
            assertNextTerms("û", "u", filter, termAtt);
            assertNextTerms("ü", "u", filter, termAtt);
            assertNextTerms("ý", "y", filter, termAtt);
            assertNextTerms("ÿ", "y", filter, termAtt);
            assertNextTerms("fi", "fi", filter, termAtt);
            assertNextTerms("fl", "fl", filter, termAtt);
            assertFalse(filter.IncrementToken());
        }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
     #pragma warning disable 612, 618
     filters = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
     #pragma warning restore 612, 618
     return new TokenStreamComponents(tokenizer, filters);
 }
        public virtual void testAllFoldings()
        {
            // Alternating strings of:
            //   1. All non-ASCII characters to be folded, concatenated together as a
            //      single string.
            //   2. The string of ASCII characters to which each of the above
            //      characters should be folded.
            string[] foldings = new string[] { "À" + "Á" + "Â" + "Ã" + "Ä" + "Å" + "Ā" + "Ă" + "Ą" + "Ə" + "Ǎ" + "Ǟ" + "Ǡ" + "Ǻ" + "Ȁ" + "Ȃ" + "Ȧ" + "Ⱥ" + "ᴀ" + "Ḁ" + "Ạ" + "Ả" + "Ấ" + "Ầ" + "Ẩ" + "Ẫ" + "Ậ" + "Ắ" + "Ằ" + "Ẳ" + "Ẵ" + "Ặ" + "Ⓐ" + "A", "A", "à" + "á" + "â" + "ã" + "ä" + "å" + "ā" + "ă" + "ą" + "ǎ" + "ǟ" + "ǡ" + "ǻ" + "ȁ" + "ȃ" + "ȧ" + "ɐ" + "ə" + "ɚ" + "ᶏ" + "ḁ" + "ᶕ" + "ẚ" + "ạ" + "ả" + "ấ" + "ầ" + "ẩ" + "ẫ" + "ậ" + "ắ" + "ằ" + "ẳ" + "ẵ" + "ặ" + "ₐ" + "ₔ" + "ⓐ" + "ⱥ" + "Ɐ" + "a", "a", "Ꜳ", "AA", "Æ" + "Ǣ" + "Ǽ" + "ᴁ", "AE", "Ꜵ", "AO", "Ꜷ", "AU", "Ꜹ" + "Ꜻ", "AV", "Ꜽ", "AY", "⒜", "(a)", "ꜳ", "aa", "æ" + "ǣ" + "ǽ" + "ᴂ", "ae", "ꜵ", "ao", "ꜷ", "au", "ꜹ" + "ꜻ", "av", "ꜽ", "ay", "Ɓ" + "Ƃ" + "Ƀ" + "ʙ" + "ᴃ" + "Ḃ" + "Ḅ" + "Ḇ" + "Ⓑ" + "B", "B", "ƀ" + "ƃ" + "ɓ" + "ᵬ" + "ᶀ" + "ḃ" + "ḅ" + "ḇ" + "ⓑ" + "b", "b", "⒝", "(b)", "Ç" + "Ć" + "Ĉ" + "Ċ" + "Č" + "Ƈ" + "Ȼ" + "ʗ" + "ᴄ" + "Ḉ" + "Ⓒ" + "C", "C", "ç" + "ć" + "ĉ" + "ċ" + "č" + "ƈ" + "ȼ" + "ɕ" + "ḉ" + "ↄ" + "ⓒ" + "Ꜿ" + "ꜿ" + "c", "c", "⒞", "(c)", "Ð" + "Ď" + "Đ" + "Ɖ" + "Ɗ" + "Ƌ" + "ᴅ" + "ᴆ" + "Ḋ" + "Ḍ" + "Ḏ" + "Ḑ" + "Ḓ" + "Ⓓ" + "Ꝺ" + "D", "D", "ð" + "ď" + "đ" + "ƌ" + "ȡ" + "ɖ" + "ɗ" + "ᵭ" + "ᶁ" + "ᶑ" + "ḋ" + "ḍ" + "ḏ" + "ḑ" + "ḓ" + "ⓓ" + "ꝺ" + "d", "d", "DŽ" + "DZ", "DZ", "Dž" + "Dz", "Dz", "⒟", "(d)", "ȸ", "db", "dž" + "dz" + "ʣ" + "ʥ", "dz", "È" + "É" + "Ê" + "Ë" + "Ē" + "Ĕ" + "Ė" + "Ę" + "Ě" + "Ǝ" + "Ɛ" + "Ȅ" + "Ȇ" + "Ȩ" + "Ɇ" + "ᴇ" + "Ḕ" + "Ḗ" + "Ḙ" + "Ḛ" + "Ḝ" + "Ẹ" + "Ẻ" + "Ẽ" + "Ế" + "Ề" + "Ể" + "Ễ" + "Ệ" + "Ⓔ" + "ⱻ" + "E", "E", "è" + "é" + "ê" + "ë" + "ē" + "ĕ" + "ė" + "ę" + "ě" + "ǝ" + "ȅ" + "ȇ" + "ȩ" + "ɇ" + "ɘ" + "ɛ" + "ɜ" + "ɝ" + "ɞ" + "ʚ" + "ᴈ" + "ᶒ" + "ᶓ" + "ᶔ" + "ḕ" + "ḗ" + "ḙ" + "ḛ" + "ḝ" + "ẹ" + "ẻ" + "ẽ" + "ế" + "ề" + "ể" + "ễ" + "ệ" + "ₑ" + "ⓔ" + "ⱸ" + "e", "e", "⒠", "(e)", "Ƒ" + "Ḟ" + "Ⓕ" + "ꜰ" + "Ꝼ" + "ꟻ" + "F", "F", "ƒ" + "ᵮ" + "ᶂ" + "ḟ" + "ẛ" + "ⓕ" + "ꝼ" + "f", "f", "⒡", "(f)", "ff", "ff", "ffi", "ffi", "ffl", "ffl", "fi", "fi", "fl", "fl", "Ĝ" + "Ğ" + "Ġ" + "Ģ" + "Ɠ" + "Ǥ" + "ǥ" + "Ǧ" + "ǧ" + "Ǵ" + "ɢ" + "ʛ" + "Ḡ" + "Ⓖ" + "Ᵹ" + "Ꝿ" + "G", "G", "ĝ" + "ğ" + "ġ" + "ģ" + "ǵ" + "ɠ" + "ɡ" + "ᵷ" + "ᵹ" + "ᶃ" + "ḡ" + "ⓖ" + "ꝿ" + "g", "g", "⒢", "(g)", "Ĥ" + "Ħ" + "Ȟ" + "ʜ" + "Ḣ" + "Ḥ" + "Ḧ" + "Ḩ" + "Ḫ" + "Ⓗ" + "Ⱨ" + "Ⱶ" + "H", "H", "ĥ" + "ħ" + "ȟ" + "ɥ" + "ɦ" + "ʮ" + "ʯ" + "ḣ" + "ḥ" + "ḧ" + "ḩ" + "ḫ" + "ẖ" + "ⓗ" + "ⱨ" + "ⱶ" + "h", "h", "Ƕ", "HV", "⒣", "(h)", "ƕ", "hv", "Ì" + "Í" + "Î" + "Ï" + "Ĩ" + "Ī" + "Ĭ" + "Į" + "İ" + "Ɩ" + "Ɨ" + "Ǐ" + "Ȉ" + "Ȋ" + "ɪ" + "ᵻ" + "Ḭ" + "Ḯ" + "Ỉ" + "Ị" + "Ⓘ" + "ꟾ" + "I", "I", "ì" + "í" + "î" + "ï" + "ĩ" + "ī" + "ĭ" + "į" + "ı" + "ǐ" + "ȉ" + "ȋ" + "ɨ" + "ᴉ" + "ᵢ" + "ᵼ" + "ᶖ" + "ḭ" + "ḯ" + "ỉ" + "ị" + "ⁱ" + "ⓘ" + "i", "i", "IJ", "IJ", "⒤", "(i)", "ij", "ij", "Ĵ" + "Ɉ" + "ᴊ" + "Ⓙ" + "J", "J", "ĵ" + "ǰ" + "ȷ" + "ɉ" + "ɟ" + "ʄ" + "ʝ" + "ⓙ" + "ⱼ" + "j", "j", "⒥", "(j)", "Ķ" + "Ƙ" + "Ǩ" + "ᴋ" + "Ḱ" + "Ḳ" + "Ḵ" + "Ⓚ" + "Ⱪ" + "Ꝁ" + "Ꝃ" + "Ꝅ" + "K", "K", "ķ" + "ƙ" + "ǩ" + "ʞ" + "ᶄ" + "ḱ" + "ḳ" + "ḵ" + "ⓚ" + "ⱪ" + "ꝁ" + "ꝃ" + "ꝅ" + "k", "k", "⒦", "(k)", "Ĺ" + "Ļ" + "Ľ" + "Ŀ" + "Ł" + "Ƚ" + "ʟ" + "ᴌ" + "Ḷ" + "Ḹ" + "Ḻ" + "Ḽ" + "Ⓛ" + "Ⱡ" + "Ɫ" + "Ꝇ" + "Ꝉ" + "Ꞁ" + "L", "L", "ĺ" + "ļ" + "ľ" + "ŀ" + "ł" + "ƚ" + "ȴ" + "ɫ" + "ɬ" + "ɭ" + "ᶅ" + "ḷ" + "ḹ" + "ḻ" + "ḽ" + "ⓛ" + "ⱡ" + "ꝇ" + "ꝉ" + "ꞁ" + "l", "l", "LJ", "LJ", "Ỻ", "LL", "Lj", "Lj", "⒧", "(l)", "lj", "lj", "ỻ", "ll", "ʪ", "ls", "ʫ", "lz", "Ɯ" + "ᴍ" + "Ḿ" + "Ṁ" + "Ṃ" + "Ⓜ" + "Ɱ" + "ꟽ" + "ꟿ" + "M", "M", "ɯ" + "ɰ" + "ɱ" + "ᵯ" + "ᶆ" + "ḿ" + "ṁ" + "ṃ" + "ⓜ" + "m", "m", "⒨", "(m)", "Ñ" + "Ń" + "Ņ" + "Ň" + "Ŋ" + "Ɲ" + "Ǹ" + "Ƞ" + "ɴ" + "ᴎ" + "Ṅ" + "Ṇ" + "Ṉ" + "Ṋ" + "Ⓝ" + "N", "N", "ñ" + "ń" + "ņ" + "ň" + "ʼn" + "ŋ" + "ƞ" + "ǹ" + "ȵ" + "ɲ" + "ɳ" + "ᵰ" + "ᶇ" + "ṅ" + "ṇ" + "ṉ" + "ṋ" + "ⁿ" + "ⓝ" + "n", "n", "NJ", "NJ", "Nj", "Nj", "⒩", "(n)", "nj", "nj", "Ò" + "Ó" + "Ô" + "Õ" + "Ö" + "Ø" + "Ō" + "Ŏ" + "Ő" + "Ɔ" + "Ɵ" + "Ơ" + "Ǒ" + "Ǫ" + "Ǭ" + "Ǿ" + "Ȍ" + "Ȏ" + "Ȫ" + "Ȭ" + "Ȯ" + "Ȱ" + "ᴏ" + "ᴐ" + "Ṍ" + "Ṏ" + "Ṑ" + "Ṓ" + "Ọ" + "Ỏ" + "Ố" + "Ồ" + "Ổ" + "Ỗ" + "Ộ" + "Ớ" + "Ờ" + "Ở" + "Ỡ" + "Ợ" + "Ⓞ" + "Ꝋ" + "Ꝍ" + "O", "O", "ò" + "ó" + "ô" + "õ" + "ö" + "ø" + "ō" + "ŏ" + "ő" + "ơ" + "ǒ" + "ǫ" + "ǭ" + "ǿ" + "ȍ" + "ȏ" + "ȫ" + "ȭ" + "ȯ" + "ȱ" + "ɔ" + "ɵ" + "ᴖ" + "ᴗ" + "ᶗ" + "ṍ" + "ṏ" + "ṑ" + "ṓ" + "ọ" + "ỏ" + "ố" + "ồ" + "ổ" + "ỗ" + "ộ" + "ớ" + "ờ" + "ở" + "ỡ" + "ợ" + "ₒ" + "ⓞ" + "ⱺ" + "ꝋ" + "ꝍ" + "o", "o", "Œ" + "ɶ", "OE", "Ꝏ", "OO", "Ȣ" + "ᴕ", "OU", "⒪", "(o)", "œ" + "ᴔ", "oe", "ꝏ", "oo", "ȣ", "ou", "Ƥ" + "ᴘ" + "Ṕ" + "Ṗ" + "Ⓟ" + "Ᵽ" + "Ꝑ" + "Ꝓ" + "Ꝕ" + "P", "P", "ƥ" + "ᵱ" + "ᵽ" + "ᶈ" + "ṕ" + "ṗ" + "ⓟ" + "ꝑ" + "ꝓ" + "ꝕ" + "ꟼ" + "p", "p", "⒫", "(p)", "Ɋ" + "Ⓠ" + "Ꝗ" + "Ꝙ" + "Q", "Q", "ĸ" + "ɋ" + "ʠ" + "ⓠ" + "ꝗ" + "ꝙ" + "q", "q", "⒬", "(q)", "ȹ", "qp", "Ŕ" + "Ŗ" + "Ř" + "Ȑ" + "Ȓ" + "Ɍ" + "ʀ" + "ʁ" + "ᴙ" + "ᴚ" + "Ṙ" + "Ṛ" + "Ṝ" + "Ṟ" + "Ⓡ" + "Ɽ" + "Ꝛ" + "Ꞃ" + "R", "R", "ŕ" + "ŗ" + "ř" + "ȑ" + "ȓ" + "ɍ" + "ɼ" + "ɽ" + "ɾ" + "ɿ" + "ᵣ" + "ᵲ" + "ᵳ" + "ᶉ" + "ṙ" + "ṛ" + "ṝ" + "ṟ" + "ⓡ" + "ꝛ" + "ꞃ" + "r", "r", "⒭", "(r)", "Ś" + "Ŝ" + "Ş" + "Š" + "Ș" + "Ṡ" + "Ṣ" + "Ṥ" + "Ṧ" + "Ṩ" + "Ⓢ" + "ꜱ" + "ꞅ" + "S", "S", "ś" + "ŝ" + "ş" + "š" + "ſ" + "ș" + "ȿ" + "ʂ" + "ᵴ" + "ᶊ" + "ṡ" + "ṣ" + "ṥ" + "ṧ" + "ṩ" + "ẜ" + "ẝ" + "ⓢ" + "Ꞅ" + "s", "s", "ẞ", "SS", "⒮", "(s)", "ß", "ss", "st", "st", "Ţ" + "Ť" + "Ŧ" + "Ƭ" + "Ʈ" + "Ț" + "Ⱦ" + "ᴛ" + "Ṫ" + "Ṭ" + "Ṯ" + "Ṱ" + "Ⓣ" + "Ꞇ" + "T", "T", "ţ" + "ť" + "ŧ" + "ƫ" + "ƭ" + "ț" + "ȶ" + "ʇ" + "ʈ" + "ᵵ" + "ṫ" + "ṭ" + "ṯ" + "ṱ" + "ẗ" + "ⓣ" + "ⱦ" + "t", "t", "Þ" + "Ꝧ", "TH", "Ꜩ", "TZ", "⒯", "(t)", "ʨ", "tc", "þ" + "ᵺ" + "ꝧ", "th", "ʦ", "ts", "ꜩ", "tz", "Ù" + "Ú" + "Û" + "Ü" + "Ũ" + "Ū" + "Ŭ" + "Ů" + "Ű" + "Ų" + "Ư" + "Ǔ" + "Ǖ" + "Ǘ" + "Ǚ" + "Ǜ" + "Ȕ" + "Ȗ" + "Ʉ" + "ᴜ" + "ᵾ" + "Ṳ" + "Ṵ" + "Ṷ" + "Ṹ" + "Ṻ" + "Ụ" + "Ủ" + "Ứ" + "Ừ" + "Ử" + "Ữ" + "Ự" + "Ⓤ" + "U", "U", "ù" + "ú" + "û" + "ü" + "ũ" + "ū" + "ŭ" + "ů" + "ű" + "ų" + "ư" + "ǔ" + "ǖ" + "ǘ" + "ǚ" + "ǜ" + "ȕ" + "ȗ" + "ʉ" + "ᵤ" + "ᶙ" + "ṳ" + "ṵ" + "ṷ" + "ṹ" + "ṻ" + "ụ" + "ủ" + "ứ" + "ừ" + "ử" + "ữ" + "ự" + "ⓤ" + "u", "u", "⒰", "(u)", "ᵫ", "ue", "Ʋ" + "Ʌ" + "ᴠ" + "Ṽ" + "Ṿ" + "Ỽ" + "Ⓥ" + "Ꝟ" + "Ꝩ" + "V", "V", "ʋ" + "ʌ" + "ᵥ" + "ᶌ" + "ṽ" + "ṿ" + "ⓥ" + "ⱱ" + "ⱴ" + "ꝟ" + "v", "v", "Ꝡ", "VY", "⒱", "(v)", "ꝡ", "vy", "Ŵ" + "Ƿ" + "ᴡ" + "Ẁ" + "Ẃ" + "Ẅ" + "Ẇ" + "Ẉ" + "Ⓦ" + "Ⱳ" + "W", "W", "ŵ" + "ƿ" + "ʍ" + "ẁ" + "ẃ" + "ẅ" + "ẇ" + "ẉ" + "ẘ" + "ⓦ" + "ⱳ" + "w", "w", "⒲", "(w)", "Ẋ" + "Ẍ" + "Ⓧ" + "X", "X", "ᶍ" + "ẋ" + "ẍ" + "ₓ" + "ⓧ" + "x", "x", "⒳", "(x)", "Ý" + "Ŷ" + "Ÿ" + "Ƴ" + "Ȳ" + "Ɏ" + "ʏ" + "Ẏ" + "Ỳ" + "Ỵ" + "Ỷ" + "Ỹ" + "Ỿ" + "Ⓨ" + "Y", "Y", "ý" + "ÿ" + "ŷ" + "ƴ" + "ȳ" + "ɏ" + "ʎ" + "ẏ" + "ẙ" + "ỳ" + "ỵ" + "ỷ" + "ỹ" + "ỿ" + "ⓨ" + "y", "y", "⒴", "(y)", "Ź" + "Ż" + "Ž" + "Ƶ" + "Ȝ" + "Ȥ" + "ᴢ" + "Ẑ" + "Ẓ" + "Ẕ" + "Ⓩ" + "Ⱬ" + "Ꝣ" + "Z", "Z", "ź" + "ż" + "ž" + "ƶ" + "ȝ" + "ȥ" + "ɀ" + "ʐ" + "ʑ" + "ᵶ" + "ᶎ" + "ẑ" + "ẓ" + "ẕ" + "ⓩ" + "ⱬ" + "ꝣ" + "z", "z", "⒵", "(z)", "⁰" + "₀" + "⓪" + "⓿" + "0", "0", "¹" + "₁" + "①" + "⓵" + "❶" + "➀" + "➊" + "1", "1", "⒈", "1.", "⑴", "(1)", "²" + "₂" + "②" + "⓶" + "❷" + "➁" + "➋" + "2", "2", "⒉", "2.", "⑵", "(2)", "³" + "₃" + "③" + "⓷" + "❸" + "➂" + "➌" + "3", "3", "⒊", "3.", "⑶", "(3)", "⁴" + "₄" + "④" + "⓸" + "❹" + "➃" + "➍" + "4", "4", "⒋", "4.", "⑷", "(4)", "⁵" + "₅" + "⑤" + "⓹" + "❺" + "➄" + "➎" + "5", "5", "⒌", "5.", "⑸", "(5)", "⁶" + "₆" + "⑥" + "⓺" + "❻" + "➅" + "➏" + "6", "6", "⒍", "6.", "⑹", "(6)", "⁷" + "₇" + "⑦" + "⓻" + "❼" + "➆" + "➐" + "7", "7", "⒎", "7.", "⑺", "(7)", "⁸" + "₈" + "⑧" + "⓼" + "❽" + "➇" + "➑" + "8", "8", "⒏", "8.", "⑻", "(8)", "⁹" + "₉" + "⑨" + "⓽" + "❾" + "➈" + "➒" + "9", "9", "⒐", "9.", "⑼", "(9)", "⑩" + "⓾" + "❿" + "➉" + "➓", "10", "⒑", "10.", "⑽", "(10)", "⑪" + "⓫", "11", "⒒", "11.", "⑾", "(11)", "⑫" + "⓬", "12", "⒓", "12.", "⑿", "(12)", "⑬" + "⓭", "13", "⒔", "13.", "⒀", "(13)", "⑭" + "⓮", "14", "⒕", "14.", "⒁", "(14)", "⑮" + "⓯", "15", "⒖", "15.", "⒂", "(15)", "⑯" + "⓰", "16", "⒗", "16.", "⒃", "(16)", "⑰" + "⓱", "17", "⒘", "17.", "⒄", "(17)", "⑱" + "⓲", "18", "⒙", "18.", "⒅", "(18)", "⑲" + "⓳", "19", "⒚", "19.", "⒆", "(19)", "⑳" + "⓴", "20", "⒛", "20.", "⒇", "(20)", "«" + "»" + "“" + "”" + "„" + "″" + "‶" + "❝" + "❞" + "❮" + "❯" + """, "\"", "‘" + "’" + "‚" + "‛" + "′" + "‵" + "‹" + "›" + "❛" + "❜" + "'", "'", "‐" + "‑" + "‒" + "–" + "—" + "⁻" + "₋" + "-", "-", "⁅" + "❲" + "[", "[", "⁆" + "❳" + "]", "]", "⁽" + "₍" + "❨" + "❪" + "(", "(", "⸨", "((", "⁾" + "₎" + "❩" + "❫" + ")", ")", "⸩", "))", "❬" + "❰" + "<", "<", "❭" + "❱" + ">", ">", "❴" + "{", "{", "❵" + "}", "}", "⁺" + "₊" + "+", "+", "⁼" + "₌" + "=", "=", "!", "!", "‼", "!!", "⁉", "!?", "#", "#", "$", "$", "⁒" + "%", "%", "&", "&", "⁎" + "*", "*", ",", ",", ".", ".", "⁄" + "/", "/", ":", ":", "⁏" + ";", ";", "?", "?", "⁇", "??", "⁈", "?!", "@", "@", "\", "\\", "‸" + "^", "^", "_", "_", "⁓" + "~", "~" };

            // Construct input text and expected output tokens
            IList<string> expectedUnfoldedTokens = new List<string>();
            IList<string> expectedFoldedTokens = new List<string>();
            StringBuilder inputText = new StringBuilder();
            for (int n = 0; n < foldings.Length; n += 2)
            {
                if (n > 0)
                {
                    inputText.Append(' '); // Space between tokens
                }
                inputText.Append(foldings[n]);

                // Construct the expected output tokens: both the unfolded and folded string,
                // with the folded duplicated as many times as the number of characters in
                // the input text.
                StringBuilder expected = new StringBuilder();
                int numChars = foldings[n].Length;
                for (int m = 0; m < numChars; ++m)
                {
                    expected.Append(foldings[n + 1]);
                }
                expectedUnfoldedTokens.Add(foldings[n]);
                expectedFoldedTokens.Add(expected.ToString());
            }

            TokenStream stream = new MockTokenizer(new StringReader(inputText.ToString()), MockTokenizer.WHITESPACE, false);
            ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream, Random().nextBoolean());
            ICharTermAttribute termAtt = filter.GetAttribute<ICharTermAttribute>();
            IEnumerator<string> unfoldedIter = expectedUnfoldedTokens.GetEnumerator();
            IEnumerator<string> foldedIter = expectedFoldedTokens.GetEnumerator();
            filter.Reset();
            while (foldedIter.MoveNext())
            {
                unfoldedIter.MoveNext();
                assertNextTerms(unfoldedIter.Current, foldedIter.Current, filter, termAtt);
            }
            assertFalse(filter.IncrementToken());
        }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
     filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
     return new TokenStreamComponents(tokenizer, filters);
 }