public void TestInputFilter() { String s = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\u00E8s petit peu de Fran\u00E7ais. <to> <confuse> <the> <detector>"; byte[] bytes = ILOG.J2CsMapping.Util.StringUtil.GetBytes(s, "ISO-8859-1"); CharsetDetector det = new CharsetDetector(); CharsetMatch m; det.EnableInputFilter(true); if (!det.InputFilterEnabled()) { Errln("input filter should be enabled"); } det.SetText(bytes); m = det.Detect(); if (!m.GetLanguage().Equals("fr")) { Errln("input filter did not strip markup!"); } det.EnableInputFilter(false); det.SetText(bytes); m = det.Detect(); if (!m.GetLanguage().Equals("en")) { Errln("unfiltered input did not detect as English!"); } }