//////////////////////////////////////////////////////////////// // methods //////////////////////////////////////////////////////////////// public SingleByteCharsetProber(SequenceModel model):base() { this.model = model; this.reversed = false; this.nameProber = null; this.seqCounters = new int[NUMBER_OF_SEQ_CAT]; reset(); }
//////////////////////////////////////////////////////////////// // methods //////////////////////////////////////////////////////////////// public SingleByteCharsetProber(SequenceModel model) : base() { this.model = model; this.reversed = false; this.nameProber = null; this.seqCounters = new int[NUMBER_OF_SEQ_CAT]; reset(); }
public SingleByteCharsetProber( SequenceModel model, bool reversed, CharsetProber nameProber):base() { this.model = model; this.reversed = reversed; this.nameProber = nameProber; this.seqCounters = new int[NUMBER_OF_SEQ_CAT]; reset(); }
public SingleByteCharsetProber( SequenceModel model, bool reversed, CharsetProber nameProber) : base() { this.model = model; this.reversed = reversed; this.nameProber = nameProber; this.seqCounters = new int[NUMBER_OF_SEQ_CAT]; reset(); }
//////////////////////////////////////////////////////////////// // methods //////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /// <param name="listener">listener a listener object that is notified of the detected encocoding.Can be null.</param> public UniversalDetector(ICharsetListener listener) { this.listener = listener; this.escCharsetProber = null; this.probers = new CharsetProber[3]; for (int i = 0; i < this.probers.Length; ++i) { this.probers[i] = null; } Reset(); }
/// <summary> /// 传入数据 /// </summary> /// <param name="buf"></param> /// <param name="offset"></param> /// <param name="length"></param> public void HandleData(byte[] buf, int offset, int length) { if (this.done) { return; } if (length > 0) { this.gotData = true; } if (this.start) { this.start = false; if (length > 3) { int b1 = buf[offset] & 0xFF; int b2 = buf[offset + 1] & 0xFF; int b3 = buf[offset + 2] & 0xFF; int b4 = buf[offset + 3] & 0xFF; switch (b1) { case 0xEF: if (b2 == 0xBB && b3 == 0xBF) { this.detectedCharset = Constants.CHARSET_UTF_8; } break; case 0xFE: if (b2 == 0xFF && b3 == 0x00 && b4 == 0x00) { this.detectedCharset = Constants.CHARSET_X_ISO_10646_UCS_4_3412; } else if (b2 == 0xFF) { this.detectedCharset = Constants.CHARSET_UTF_16BE; } break; case 0x00: if (b2 == 0x00 && b3 == 0xFE && b4 == 0xFF) { this.detectedCharset = Constants.CHARSET_UTF_32BE; } else if (b2 == 0x00 && b3 == 0xFF && b4 == 0xFE) { this.detectedCharset = Constants.CHARSET_X_ISO_10646_UCS_4_2143; } break; case 0xFF: if (b2 == 0xFE && b3 == 0x00 && b4 == 0x00) { this.detectedCharset = Constants.CHARSET_UTF_32LE; } else if (b2 == 0xFE) { this.detectedCharset = Constants.CHARSET_UTF_16LE; } break; } // swich end if (this.detectedCharset != null) { this.done = true; return; } } } // if (start) end int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { int c = buf[i] & 0xFF; if ((c & 0x80) != 0 && c != 0xA0) { if (this.inputState != InputState.HIGHBYTE) { this.inputState = InputState.HIGHBYTE; if (this.escCharsetProber != null) { this.escCharsetProber = null; } if (this.probers[0] == null) { this.probers[0] = new MBCSGroupProber(); } if (this.probers[1] == null) { this.probers[1] = new SBCSGroupProber(); } if (this.probers[2] == null) { this.probers[2] = new Latin1Prober(); } } } else { if (this.inputState == InputState.PURE_ASCII && (c == 0x1B || (c == 0x7B && this.lastChar == 0x7E))) { this.inputState = InputState.ESC_ASCII; } this.lastChar = buf[i]; } } // for end CharsetProber.ProbingState st; if (this.inputState == InputState.ESC_ASCII) { if (this.escCharsetProber == null) { this.escCharsetProber = new EscCharsetProber(); } st = this.escCharsetProber.handleData(buf, offset, length); if (st == CharsetProber.ProbingState.FOUND_IT) { this.done = true; this.detectedCharset = this.escCharsetProber.getCharSetName(); } } else if (this.inputState == InputState.HIGHBYTE) { for (int i = 0; i < this.probers.Length; ++i) { st = this.probers[i].handleData(buf, offset, length); if (st == CharsetProber.ProbingState.FOUND_IT) { this.done = true; this.detectedCharset = this.probers[i].getCharSetName(); return; } } } else { // pure ascii // do nothing } }
public void setModalProbers(CharsetProber logicalProber, CharsetProber visualProber) { this.logicalProber = logicalProber; this.visualProber = visualProber; }
//////////////////////////////////////////////////////////////// // methods //////////////////////////////////////////////////////////////// public HebrewProber():base() { this.logicalProber = null; this.visualProber = null; reset(); }
public void setModalProbers(CharsetProber logicalProber, CharsetProber visualProber) { this.logicalProber = logicalProber; this.visualProber = visualProber; }
//////////////////////////////////////////////////////////////// // methods //////////////////////////////////////////////////////////////// public HebrewProber() : base() { this.logicalProber = null; this.visualProber = null; reset(); }