public void Consume(byte b) { // TODO BOM switch (_state) { case Utf8State.Start: if (b >= 0xc2 && b <= 0xdf) { _state = Utf8State.Multibyte; _multibytesRemaining = 1; } else if (b >= 0xe0 && b <= 0xef) { _state = Utf8State.Multibyte; _multibytesRemaining = 2; } else if (b >= 0xf0 && b <= 0xf4) { _state = Utf8State.Multibyte; _multibytesRemaining = 3; } else if (b >= 0x80) { _state = Utf8State.Error; } if (_state != Utf8State.Error) { _uncertainty = (_uncertainty * InitialByteProbability) >> Detector.MaxCertaintyBits; } break; case Utf8State.Multibyte: if (b >= 0x80 && b <= 0xbf) { if (--_multibytesRemaining == 0) { _state = Utf8State.Start; } _uncertainty = (_uncertainty * ContinuationByteProbability) >> Detector.MaxCertaintyBits; } else { _state = Utf8State.Error; } break; case Utf8State.Error: throw new InvalidOperationException("Utf8Detector.Consume called while invalid"); default: throw new InvalidOperationException("Unknown UTF-8 state"); } }
public void Consume(byte b) { // TODO BOM switch(_state) { case Utf8State.Start: if (b >= 0xc2 && b <= 0xdf) { _state = Utf8State.Multibyte; _multibytesRemaining = 1; } else if (b >= 0xe0 && b <= 0xef) { _state = Utf8State.Multibyte; _multibytesRemaining = 2; } else if (b >= 0xf0 && b <= 0xf4) { _state = Utf8State.Multibyte; _multibytesRemaining = 3; } else if (b >= 0x80) { _state = Utf8State.Error; } if (_state != Utf8State.Error) { _uncertainty = (_uncertainty*InitialByteProbability) >> Detector.MaxCertaintyBits; } break; case Utf8State.Multibyte: if (b >= 0x80 && b <= 0xbf) { if (--_multibytesRemaining == 0) { _state = Utf8State.Start; } _uncertainty = (_uncertainty*ContinuationByteProbability) >> Detector.MaxCertaintyBits; } else { _state = Utf8State.Error; } break; case Utf8State.Error: throw new InvalidOperationException("Utf8Detector.Consume called while invalid"); default: throw new InvalidOperationException("Unknown UTF-8 state"); } }