/** * @param inputStream * @param errorHandler * @param locator * @throws IOException * @throws SAXException */ public HtmlInputStreamReader(Stream inputStream, ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, Heuristics heuristics) { this.inputStream = inputStream; this.errorHandler = errorHandler; this.tokenizer = tokenizer; this.driver = driver; this.sniffing = true; Encoding encoding = (new BomSniffer(this)).sniff(); if (encoding == null) { position = 0; encoding = (new MetaSniffer(errorHandler, this)).sniff(this); if (encoding == null && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { encoding = (new ChardetSniffer(byteArray, limit)).sniff(); } if (encoding == null && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { position = 0; encoding = (new IcuDetectorSniffer(this)).sniff(); } sniffing = false; if (encoding == null) { encoding = Encoding.WINDOWS1252; } if (driver != null) { driver.setEncoding(encoding, Confidence.TENTATIVE); } } else { if (encoding == Encoding.UTF8) { if (driver != null) { driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN); } } else { if (driver != null) { driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN); } } } this.decoder = encoding.newDecoder(); sniffing = false; position = 0; bytesRead = 0; byteBuffer.position(position); byteBuffer.limit(limit); initDecoder(); }