public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i=offset; i<maxPos; ++i) { codingState = this.codingSM.nextState(buf[i]); if (codingState == SMModel.ERROR) { this.state = ProbingState.NOT_ME; break; } if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; break; } if (codingState == SMModel.START) { if (this.codingSM.getCurrentCharLen() >= 2) { ++this.numOfMBChar; } } } if (this.state == ProbingState.DETECTING) { if (getConfidence() > SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } } return this.state; }
public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i=offset; i<maxPos && this.state==ProbingState.DETECTING; ++i) { for (int j=this.activeSM-1; j>=0; --j) { codingState = this.codingSM[j].nextState(buf[i]); if (codingState == SMModel.ERROR) { --this.activeSM; if (this.activeSM <= 0) { this.state = ProbingState.NOT_ME; return this.state; } else if (j != this.activeSM) { CodingStateMachine t; t = this.codingSM[this.activeSM]; this.codingSM[this.activeSM] = this.codingSM[j]; this.codingSM[j] = t; } } else if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; this.detectedCharset = this.codingSM[j].getCodingStateMachine(); return this.state; } } } return this.state; }
public override void reset() { this.codingSM.reset(); this.state = ProbingState.DETECTING; this.contextAnalyzer.reset(); this.distributionAnalyzer.reset(); Array.Clear(this.lastChar, 0, this.lastChar.Length); }
public override void reset() { this.codingSM.reset(); this.state = ProbingState.DETECTING; this.distributionAnalyzer.reset(); Array.Clear(this.lastChar, 0, this.lastChar.Length); //java.util.Arrays.fill(this.lastChar, (byte)0); }
public override ProbingState HandleData(byte[] buffer, int offset, int length) { // do filtering to reduce load to probers byte[] highbyteBuf = new byte[length]; int hptr = 0; // assume previous is not ASCII, it will do no harm except add some noise bool keepNext = true; int max = offset + length; for (int i = offset; i < max; i++) { if ((buffer[i] & 0x80) != 0) { highbyteBuf[hptr++] = buffer[i]; keepNext = true; } else { // if previous is highbyte, keep this even it is a ASCII if (keepNext) { highbyteBuf[hptr++] = buffer[i]; keepNext = false; } } } ProbingState st = ProbingState.NegativeDetection; for (int i = 0; i < this.probers.Length; i++) { if (!this.isActive[i]) { continue; } st = this.probers[i].HandleData(highbyteBuf, 0, hptr); if (st == ProbingState.Detected) { this.bestGuess = i; this.State = ProbingState.Detected; break; } else if (st == ProbingState.NegativeDetection) { this.isActive[i] = false; this.activeNum--; if (this.activeNum <= 0) { this.State = ProbingState.NegativeDetection; break; } } } return(this.State); }
public override void reset() { this.state = ProbingState.DETECTING; this.lastCharClass = OTH; for (int i = 0; i < this.freqCounter.Length; ++i) { this.freqCounter[i] = 0; } }
public override ProbingState handleData(byte[] buf, int offset, int length) { ProbingState st; bool keepNext = true; byte[] highbyteBuf = new byte[length]; int highpos = 0; int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { if ((buf[i] & 0x80) != 0) { highbyteBuf[highpos++] = buf[i]; keepNext = true; } else { //if previous is highbyte, keep this even it is a ASCII if (keepNext) { highbyteBuf[highpos++] = buf[i]; keepNext = false; } } } for (int i = 0; i < this.probers.Length; ++i) { if (!this.isActive[i]) { continue; } st = this.probers[i].handleData(highbyteBuf, 0, highpos); if (st == ProbingState.FOUND_IT) { this.bestGuess = i; this.state = ProbingState.FOUND_IT; break; } else if (st == ProbingState.NOT_ME) { this.isActive[i] = false; --this.activeNum; if (this.activeNum <= 0) { this.state = ProbingState.NOT_ME; break; } } } return(this.state); }
public override void reset() { this.state = ProbingState.DETECTING; for (int i = 0; i < this.codingSM.Length; ++i) { this.codingSM[i].reset(); } this.activeSM = this.codingSM.Length; this.detectedCharset = null; }
public override void Reset() { mState = ProbingState.Detecting; mLastCharClass = OTH; for (int i = 0; i < FREQ_CAT_NUM; i++) { mFreqCounter[i] = 0; } active = true; }
public override ProbingState HandleData(byte[] buf, int offset, int len) { // do filtering to reduce load to probers byte[] highbyteBuf = new byte[len]; int hptr = 0; //assume previous is not ascii, it will do no harm except add some noise bool keepNext = true; int max = offset + len; for (int i = offset; i < max; i++) { if ((buf[i] & 0x80) != 0) { highbyteBuf[hptr++] = buf[i]; keepNext = true; } else { //if previous is highbyte, keep this even it is a ASCII if (keepNext) { highbyteBuf[hptr++] = buf[i]; keepNext = false; } } } ProbingState st = ProbingState.NotMe; for (int i = 0; i < probers.Length; i++) { if (!isActive[i]) { continue; } st = probers[i].HandleData(highbyteBuf, 0, hptr); if (st == ProbingState.FoundIt) { bestGuess = i; state = ProbingState.FoundIt; break; } else if (st == ProbingState.NotMe) { isActive[i] = false; activeNum--; if (activeNum <= 0) { state = ProbingState.NotMe; break; } } } return(state); }
public override void reset() { this.state = ProbingState.DETECTING; this.lastOrder = 255; for (int i = 0; i < NUMBER_OF_SEQ_CAT; ++i) { this.seqCounters[i] = 0; } this.totalSeqs = 0; this.totalChar = 0; this.freqChar = 0; }
public override void reset() { this.activeNum = 0; for (int i = 0; i < this.probers.Length; ++i) { this.probers[i].reset(); this.isActive[i] = true; ++this.activeNum; } this.bestGuess = -1; this.state = ProbingState.DETECTING; }
public void Reset() { this.currentState = ProbingState.Detecting; this.lastOrder = 255; for (int i = 0; i < NUMBER_OF_SEQ_CAT; i++) { seqCounters[i] = 0; } this.totalSeqs = 0; this.totalChar = 0; this.freqChar = 0; this.active = true; }
public override void Reset() { mState = ProbingState.Detecting; mLastOrder = 255; for (int i = 0; i < NUMBER_OF_SEQ_CAT; i++) { mSeqCounters[i] = 0; } mTotalSeqs = 0; mTotalChar = 0; mFreqChar = 0; active = true; }
public override ProbingState HandleData(byte[] aBuf, int length) { byte order; for (int i = 0; i < aBuf.Length && i < length; i++) { order = mModel.charToOrderMap[aBuf[i]]; if (order < SYMBOL_CAT_ORDER) { mTotalChar++; } if (order < SAMPLE_SIZE) { mFreqChar++; if (mLastOrder < SAMPLE_SIZE) { mTotalSeqs++; if (!mReversed) { ++(mSeqCounters[mModel.precedenceMatrix[mLastOrder * SAMPLE_SIZE + order]]); } else // reverse the order of the letters in the lookup { ++(mSeqCounters[mModel.precedenceMatrix[order * SAMPLE_SIZE + mLastOrder]]); } } } mLastOrder = order; } if (mState == ProbingState.Detecting) { if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) { float cf = GetConfidence(); if (cf > POSITIVE_SHORTCUT_THRESHOLD) { mState = ProbingState.FoundIt; } else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) { mState = ProbingState.NotMe; } } } return(mState); }
public override ProbingState handleData(byte[] buf, int offset, int length) { short order; int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { order = this.model.getOrder(buf[i]); if (order < SYMBOL_CAT_ORDER) { ++this.totalChar; } if (order < SAMPLE_SIZE) { ++this.freqChar; if (this.lastOrder < SAMPLE_SIZE) { ++this.totalSeqs; if (!this.reversed) { ++(this.seqCounters[this.model.getPrecedence(this.lastOrder * SAMPLE_SIZE + order)]); } else { ++(this.seqCounters[this.model.getPrecedence(order * SAMPLE_SIZE + this.lastOrder)]); } } } this.lastOrder = order; } if (this.state == ProbingState.DETECTING) { if (this.totalSeqs > SB_ENOUGH_REL_THRESHOLD) { float cf = getConfidence(); if (cf > POSITIVE_SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) { this.state = ProbingState.NOT_ME; } } } return(this.state); }
public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { codingState = this.codingSM.nextState(buf[i]); if (codingState == SMModel.ERROR) { this.state = ProbingState.NOT_ME; break; } if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; break; } if (codingState == SMModel.START) { int charLen = this.codingSM.getCurrentCharLen(); if (i == offset) { this.lastChar[1] = buf[offset]; this.contextAnalyzer.handleOneChar(this.lastChar, 0, charLen); this.distributionAnalyzer.handleOneChar(this.lastChar, 0, charLen); } else { this.contextAnalyzer.handleOneChar(buf, i - 1, charLen); this.distributionAnalyzer.handleOneChar(buf, i - 1, charLen); } } } this.lastChar[0] = buf[maxPos - 1]; if (this.state == ProbingState.DETECTING) { if (this.contextAnalyzer.gotEnoughData() && getConfidence() > SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } } return(this.state); }
public override ProbingState HandleData(byte[] buf, int offset, int len) { byte[] array = new byte[len]; int len2 = 0; bool flag = true; checked { int num = offset + len; for (int i = offset; i < num; i++) { if ((buf[i] & 128) != 0) { array[len2++] = buf[i]; flag = true; } else if (flag) { array[len2++] = buf[i]; flag = false; } } for (int j = 0; j < this.probers.Length; j++) { if (this.isActive[j]) { ProbingState probingState = this.probers[j].HandleData(array, 0, len2); if (probingState == ProbingState.FoundIt) { this.bestGuess = j; this.state = ProbingState.FoundIt; break; } if (probingState == ProbingState.NotMe) { this.isActive[j] = false; this.activeNum--; if (this.activeNum <= 0) { this.state = ProbingState.NotMe; break; } } } } return(this.state); } }
public override ProbingState HandleData(byte[] aBuf, int aLen) { SMState codingState; for (int i = 0; i < aBuf.Length && i < aLen; i++) { codingState = mCodingSM.NextState(aBuf[i]); if (codingState == SMState.Error) { mState = ProbingState.NotMe; break; } if (codingState == SMState.ItsMe) { mState = ProbingState.FoundIt; break; } if (codingState == SMState.Start) { int charLen = mCodingSM.CurrentCharLen; if (i == 0) { mLastChar[1] = aBuf[0]; //mContextAnalyser.HandleOneChar(mLastChar, charLen); mDistributionAnalyser.HandleOneChar(mLastChar, charLen); } else { //mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); //mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); } } } mLastChar[0] = aBuf[aLen - 1]; if (mState == ProbingState.Detecting) { if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) { mState = ProbingState.FoundIt; } } return(mState); }
public ProbingState HandleData(byte[] buffer, int start, int length) { if (buffer == null) { throw new ArgumentNullException("buffer", Properties.Resources.NullBufferExceptionMessage); } if (start < 0) { throw new ArgumentException(Properties.Resources.NegativeStartIndexExceptionMessage, "start"); } // if we are not active, we needn't do any work. if (!this.isActive) { return(this.state); } // otherwise, we continue, even if we've made up our mind. foreach (ICharSetProber prober in this.probers) { if (!prober.IsActive) { continue; } ProbingState st = prober.HandleData(buffer, start, length); if (st == ProbingState.FoundIt) { this.bestGuess = prober; this.state = ProbingState.FoundIt; break; } else if (st == ProbingState.NotMe) { prober.IsActive = false; this.activeNum--; if (this.activeNum <= 0) { this.state = ProbingState.NotMe; this.isActive = false; break; } } } return(this.state); }
public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { codingState = this.codingSM.nextState(buf[i]); if (codingState == SMModel.ERROR) { this.state = ProbingState.NOT_ME; break; } if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; break; } if (codingState == SMModel.START) { int charLen = this.codingSM.getCurrentCharLen(); if (i == offset) { this.lastChar[1] = buf[offset]; this.distributionAnalyzer.handleOneChar(this.lastChar, 0, charLen); } else { this.distributionAnalyzer.handleOneChar(buf, i - 1, charLen); } } } this.lastChar[0] = buf[maxPos - 1]; if (this.state == ProbingState.DETECTING) { if (this.distributionAnalyzer.gotEnoughData() && getConfidence() > SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } } return this.state; }
public override ProbingState HandleData(byte[] buffer, int offset, int length) { ProbingState st = ProbingState.NegativeDetection; // apply filter to original buffer, and we got new buffer back // depend on what script it is, we will feed them the new buffer // we got after applying proper filter // this is done without any consideration to KeepEnglishLetters // of each prober since as of now, there are no probers here which // recognize languages with English characters. byte[] newBuf = buffer.FilterWithoutEnglishLetters(offset, length); if (newBuf.Length == 0) { return(this.State); // Nothing to see here, move on. } for (int i = 0; i < NumberOProbes; i++) { if (!this.isActive[i]) { continue; } st = this.probers[i].HandleData(newBuf, 0, newBuf.Length); if (st == ProbingState.Detected) { this.bestGuess = i; this.State = ProbingState.Detected; break; } else if (st == ProbingState.NegativeDetection) { this.isActive[i] = false; this.activeNum--; if (this.activeNum <= 0) { this.State = ProbingState.NegativeDetection; break; } } } return(this.State); }
public override ProbingState HandleData(byte[] buf, int offset, int len) { ProbingState st = ProbingState.NotMe; //apply filter to original buffer, and we got new buffer back //depend on what script it is, we will feed them the new buffer //we got after applying proper filter //this is done without any consideration to KeepEnglishLetters //of each prober since as of now, there are no probers here which //recognize languages with English characters. byte[] newBuf = FilterWithoutEnglishLetters(buf, offset, len); if (newBuf.Length == 0) { return(state); // Nothing to see here, move on. } for (int i = 0; i < PROBERS_NUM; i++) { if (!isActive[i]) { continue; } st = probers[i].HandleData(newBuf, 0, newBuf.Length); if (st == ProbingState.FoundIt) { bestGuess = i; state = ProbingState.FoundIt; break; } else if (st == ProbingState.NotMe) { isActive[i] = false; activeNum--; if (activeNum <= 0) { state = ProbingState.NotMe; break; } } } return(state); }
public override ProbingState handleData(byte[] buf, int offset, int length) { ProbingState st; do { ByteBuffer newbuf = filterWithoutEnglishLetters(buf, offset, length); if (newbuf.Position == 0) { break; } for (int i = 0; i < this.probers.Length; ++i) { if (!this.isActive[i]) { continue; } st = this.probers[i].handleData(newbuf.ToByteArray(), 0, newbuf.Position); if (st == ProbingState.FOUND_IT) { this.bestGuess = i; this.state = ProbingState.FOUND_IT; break; } else if (st == ProbingState.NOT_ME) { this.isActive[i] = false; --this.activeNum; if (this.activeNum <= 0) { this.state = ProbingState.NOT_ME; break; } } } } while (false); return(this.state); }
public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i = offset; i < maxPos && this.state == ProbingState.DETECTING; ++i) { for (int j = this.activeSM - 1; j >= 0; --j) { codingState = this.codingSM[j].nextState(buf[i]); if (codingState == SMModel.ERROR) { --this.activeSM; if (this.activeSM <= 0) { this.state = ProbingState.NOT_ME; return(this.state); } else if (j != this.activeSM) { CodingStateMachine t; t = this.codingSM[this.activeSM]; this.codingSM[this.activeSM] = this.codingSM[j]; this.codingSM[j] = t; } } else if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; this.detectedCharset = this.codingSM[j].getCodingStateMachine(); return(this.state); } } } return(this.state); }
public override ProbingState handleData(byte[] buf, int offset, int length) { int codingState; int maxPos = offset + length; for (int i = offset; i < maxPos; ++i) { codingState = this.codingSM.nextState(buf[i]); if (codingState == SMModel.ERROR) { this.state = ProbingState.NOT_ME; break; } if (codingState == SMModel.ITSME) { this.state = ProbingState.FOUND_IT; break; } if (codingState == SMModel.START) { if (this.codingSM.getCurrentCharLen() >= 2) { ++this.numOfMBChar; } } } if (this.state == ProbingState.DETECTING) { if (getConfidence() > SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } } return(this.state); }
public override ProbingState HandleData(byte[] buffer, int length) { SMState codingState; for (int i = 0; i < buffer.Length && i < length; i++) { codingState = mCodingSM.NextState(buffer[i]); if (codingState == SMState.Error) { mState = ProbingState.NotMe; break; } if (codingState == SMState.ItsMe) { mState = ProbingState.FoundIt; break; } if (codingState == SMState.Start) { if (mCodingSM.CurrentCharLen >= 2) { mNumOfMBChar++; } } } if (mState == ProbingState.Detecting) { if (GetConfidence() > SHORTCUT_THRESHOLD) { mState = ProbingState.FoundIt; } } return(mState); }
public ProbingState HandleData(byte[] buffer, int start, int length) { if (buffer == null) { throw new ArgumentNullException("buffer", Properties.Resources.NullBufferExceptionMessage); } if (start < 0) { throw new ArgumentException(Properties.Resources.NegativeStartIndexExceptionMessage, "start"); } // if we are not active, we needn't do any work. if (!active) { return(currentState); } // otherwise, we continue, even if we've made up our mind. byte order; int end = start + length; for (int i = start; i < buffer.Length && i < end; ++i) { order = model.CharToOrderMap[buffer[i]]; if (order < SYMBOL_CAT_ORDER) { totalChar++; } if (order < SAMPLE_SIZE) { freqChar++; if (lastOrder < SAMPLE_SIZE) { totalSeqs++; if (!reversed) { ++(seqCounters[model.PrecedenceMatrix[lastOrder * SAMPLE_SIZE + order]]); } else // reverse the order of the letters in the lookup { ++(seqCounters[model.PrecedenceMatrix[order * SAMPLE_SIZE + lastOrder]]); } } } lastOrder = order; } if (currentState == ProbingState.Detecting) { if (totalSeqs > SB_ENOUGH_REL_THRESHOLD) { float cf = Confidence; if (cf > POSITIVE_SHORTCUT_THRESHOLD) { currentState = ProbingState.FoundIt; } else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) { currentState = ProbingState.NotMe; } //else // stay Detecting } } return(currentState); }
public virtual void Feed(byte[] buf, int offset, int len) { if (done) { return; } if (len > 0) { gotData = true; } // If the data starts with BOM, we know it is UTF if (start) { start = false; if (len > 3) { switch (buf[0]) { case 0xEF: if (0xBB == buf[1] && 0xBF == buf[2]) { detectedCharset = "UTF-8"; } break; case 0xFE: if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) { // FE FF 00 00 UCS-4, unusual octet order BOM (3412) detectedCharset = "X-ISO-10646-UCS-4-3412"; } else if (0xFF == buf[1]) { detectedCharset = "UTF-16BE"; } break; case 0x00: if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3]) { detectedCharset = "UTF-32BE"; } else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3]) { // 00 00 FF FE UCS-4, unusual octet order BOM (2143) detectedCharset = "X-ISO-10646-UCS-4-2143"; } break; case 0xFF: if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) { detectedCharset = "UTF-32LE"; } else if (0xFE == buf[1]) { detectedCharset = "UTF-16LE"; } break; } // switch } if (detectedCharset != null) { done = true; return; } } for (int i = 0; i < len; i++) { // other than 0xa0, if every other character is ascii, the page is ascii if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) { // we got a non-ascii byte (high-byte) if (inputState != InputState.Highbyte) { inputState = InputState.Highbyte; // kill EscCharsetProber if it is active if (escCharsetProber != null) { escCharsetProber = null; } // start multibyte and singlebyte charset prober if (charsetProbers[0] == null) { charsetProbers[0] = new MBCSGroupProber(); } if (charsetProbers[1] == null) { charsetProbers[1] = new SBCSGroupProber(); } if (charsetProbers[2] == null) { charsetProbers[2] = new Latin1Prober(); } } } else { if (inputState == InputState.PureASCII && (buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) { // found escape character or HZ "~{" inputState = InputState.EscASCII; } lastChar = buf[i]; } } ProbingState st = ProbingState.NotMe; switch (inputState) { case InputState.EscASCII: if (escCharsetProber == null) { escCharsetProber = new EscCharsetProber(); } st = escCharsetProber.HandleData(buf, offset, len); if (st == ProbingState.FoundIt) { done = true; detectedCharset = escCharsetProber.GetCharsetName(); } break; case InputState.Highbyte: for (int i = 0; i < PROBERS_NUM; i++) { if (charsetProbers[i] != null) { st = charsetProbers[i].HandleData(buf, offset, len); #if DEBUG charsetProbers[i].DumpStatus(); #endif if (st == ProbingState.FoundIt) { done = true; detectedCharset = charsetProbers[i].GetCharsetName(); return; } } } break; default: // pure ascii break; } return; }
public override void Reset() { mCodingSM.Reset(); mNumOfMBChar = 0; mState = ProbingState.Detecting; active = true; }
/// <summary> /// Read a block of bytes into the detector. /// </summary> /// <param name="input">input buffer</param> /// <param name="offset">offset into buffer</param> /// <param name="length">number of available bytes</param> public void Read(byte[] input, int offset, int length) { if (this.DetectorState == DetectorState.Done) { return; } // If the data starts with BOM, we know it is UTF if (length > 0 && this.DetectorState == DetectorState.Start) { this.DetectorState = DetectorState.GotData; this.DetectedCharset = this.DetectByteOrderMark(input); if (this.DetectedCharset != null) { this.DetectorState = DetectorState.Done; return; } } for (int i = 0; i < length; i++) { // other than 0xa0, if every other character is ASCII, the page is ASCII if ((input[i] & 0x80) != 0 && input[i] != 0xA0) { // we got a non-ASCII byte (high-byte) if (this.DetectedCharacters != DetectedCharacters.Highbyte) { this.DetectedCharacters = DetectedCharacters.Highbyte; // kill EscCharsetProber if it is active this.EscCharsetProber = null; // start multi byte and single byte charset prober if (this.CharsetProbers[0] == null) { this.CharsetProbers[0] = new MultiByteCharsetProbeSet(); } if (this.CharsetProbers[1] == null) { this.CharsetProbers[1] = new SingleByteCharsetProbeSet(); } if (this.CharsetProbers[2] == null) { this.CharsetProbers[2] = new Latin1Prober(); } } } else { if (this.DetectedCharacters == DetectedCharacters.PureASCII && (input[i] == 0x1B || (input[i] == 0x7B && this.LastChar == 0x7E))) { // found escape character or HZ "~{" this.DetectedCharacters = DetectedCharacters.EscASCII; } this.LastChar = input[i]; } } ProbingState st = ProbingState.NegativeDetection; switch (this.DetectedCharacters) { case DetectedCharacters.EscASCII: if (this.EscCharsetProber == null) { this.EscCharsetProber = new EscCharsetProbeSet(); } st = this.EscCharsetProber.HandleData(input, offset, length); if (st == ProbingState.Detected) { this.DetectorState = DetectorState.Done; this.DetectedCharset = this.EscCharsetProber.GetCharsetName(); } break; case DetectedCharacters.Highbyte: for (int i = 0; i < ProbersNum; i++) { if (this.CharsetProbers[i] != null) { st = this.CharsetProbers[i].HandleData(input, offset, length); #if DEBUG this.CharsetProbers[i].DumpStatus(); #endif if (st == ProbingState.Detected) { this.DetectorState = DetectorState.Done; this.DetectedCharset = this.CharsetProbers[i].GetCharsetName(); return; } } } break; } return; }
public override void reset() { this.state = ProbingState.DETECTING; this.lastOrder = 255; for (int i=0; i<NUMBER_OF_SEQ_CAT; ++i) { this.seqCounters[i] = 0; } this.totalSeqs = 0; this.totalChar = 0; this.freqChar = 0; }
public override void Reset() { mCodingSM.Reset(); mState = ProbingState.Detecting; mContextAnalyser.Reset(); mDistributionAnalyser.Reset(); }
public override ProbingState HandleData(byte[] buffer, int length) { ProbingState st; byte[] filtered = new byte[buffer.Length]; int filteredLength = 0; //apply filter to original buffer, and we got new buffer back //depend on what script it is, we will feed them the new buffer //we got after applying proper filter //this is done without any consideration to KeepEnglishLetters //of each prober since as of now, there are no probers here which //recognize languages with English characters. filteredLength = FilterWithoutEnglishLetters(buffer, filtered); if (filteredLength == 0) return mState; // Nothing to see here, move on. foreach (AbstractCSProber prober in mProbers) { if (!prober.IsActive) continue; st = prober.HandleData(filtered,filteredLength); if (st == ProbingState.FoundIt) { mBestGuess = prober; mState = ProbingState.FoundIt; break; } else if (st == ProbingState.NotMe) { prober.IsActive = false; mActiveNum--; if (mActiveNum <= 0) { mState = ProbingState.NotMe; break; } } } return mState; }
public override ProbingState handleData(byte[] buf, int offset, int length) { ProbingState st; bool keepNext = true; byte[] highbyteBuf = new byte[length]; int highpos = 0; int maxPos = offset + length; for (int i=offset; i<maxPos; ++i) { if ((buf[i] & 0x80) != 0) { highbyteBuf[highpos++] = buf[i]; keepNext = true; } else { //if previous is highbyte, keep this even it is a ASCII if (keepNext) { highbyteBuf[highpos++] = buf[i]; keepNext = false; } } } for (int i=0; i<this.probers.Length; ++i) { if (!this.isActive[i]) { continue; } st = this.probers[i].handleData(highbyteBuf, 0, highpos); if (st == ProbingState.FOUND_IT) { this.bestGuess = i; this.state = ProbingState.FOUND_IT; break; } else if (st == ProbingState.NOT_ME) { this.isActive[i] = false; --this.activeNum; if (this.activeNum <= 0) { this.state = ProbingState.NOT_ME; break; } } } return this.state; }
public override void Reset() { mCodingSM.Reset(); mState = ProbingState.Detecting; mDistributionAnalyser.Reset(); }
public override void reset() { this.state = ProbingState.DETECTING; for (int i=0; i<this.codingSM.Length; ++i) { this.codingSM[i].reset(); } this.activeSM = this.codingSM.Length; this.detectedCharset = null; }
public override void Reset() { mState = ProbingState.Detecting; mLastOrder = 255; for (int i = 0; i < NUMBER_OF_SEQ_CAT; i++) mSeqCounters[i] = 0; mTotalSeqs = 0; mTotalChar = 0; mFreqChar = 0; active = true; }
public override ProbingState HandleData(byte[] aBuf, int length) { byte order; for (int i = 0; i < aBuf.Length && i < length; i++) { order = mModel.charToOrderMap[aBuf[i]]; if (order < SYMBOL_CAT_ORDER) mTotalChar++; if (order < SAMPLE_SIZE) { mFreqChar++; if (mLastOrder < SAMPLE_SIZE) { mTotalSeqs++; if (!mReversed) ++(mSeqCounters[mModel.precedenceMatrix[mLastOrder * SAMPLE_SIZE + order]]); else // reverse the order of the letters in the lookup ++(mSeqCounters[mModel.precedenceMatrix[order * SAMPLE_SIZE + mLastOrder]]); } } mLastOrder = order; } if (mState == ProbingState.Detecting) if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) { float cf = GetConfidence(); if (cf > POSITIVE_SHORTCUT_THRESHOLD) mState = ProbingState.FoundIt; else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) mState = ProbingState.NotMe; } return mState; }
public override void reset() { this.state = ProbingState.DETECTING; this.lastCharClass = OTH; for (int i=0; i<this.freqCounter.Length; ++i) { this.freqCounter[i] = 0; } }
public HebrewCharSetProber() { this.state = ProbingState.Detecting; }
public override void reset() { this.codingSM.reset(); this.numOfMBChar = 0; this.state = ProbingState.DETECTING; }
public override ProbingState handleData(byte[] buf, int offset, int length) { ProbingState st; do { ByteBuffer newbuf = filterWithoutEnglishLetters(buf, offset, length); if (newbuf.Position == 0) { break; } for (int i=0; i<this.probers.Length; ++i) { if (!this.isActive[i]) { continue; } st = this.probers[i].handleData(newbuf.ToByteArray(), 0, newbuf.Position); if (st == ProbingState.FOUND_IT) { this.bestGuess = i; this.state = ProbingState.FOUND_IT; break; } else if (st == ProbingState.NOT_ME) { this.isActive[i] = false; --this.activeNum; if (this.activeNum <= 0) { this.state = ProbingState.NOT_ME; break; } } } } while (false); return this.state; }
public override ProbingState HandleData(byte[] aBuf, int length) { byte[] newBuf1 = new byte[aBuf.Length]; int newLen1 = 0; newLen1 = FilterWithEnglishLetters(aBuf, newBuf1); byte charClass; byte freq; for (int i = 0; i < newLen1; i++) { charClass = Latin1_CharToClass[(byte)newBuf1[i]]; freq = Latin1ClassModel[mLastCharClass * CLASS_NUM + charClass]; if (freq == 0) { mState = ProbingState.NotMe; break; } mFreqCounter[freq]++; mLastCharClass = charClass; } if (newBuf1 != aBuf) newBuf1 = null; return mState; }
public override ProbingState HandleData(byte[] aBuf, int aLen) { SMState codingState; for (int i = 0; i < aBuf.Length && i < aLen; i++) { codingState = mCodingSM.NextState(aBuf[i]); if (codingState == SMState.Error) { mState = ProbingState.NotMe; break; } if (codingState == SMState.ItsMe) { mState = ProbingState.FoundIt; break; } if (codingState == SMState.Start) { int charLen = mCodingSM.CurrentCharLen; if (i == 0) { mLastChar[1] = aBuf[0]; //mContextAnalyser.HandleOneChar(mLastChar, charLen); mDistributionAnalyser.HandleOneChar(mLastChar, charLen); } else { //mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); //mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); } } } mLastChar[0] = aBuf[aLen - 1]; if (mState == ProbingState.Detecting) if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) mState = ProbingState.FoundIt; return mState; }
public override void Reset() { mState = ProbingState.Detecting; mLastCharClass = OTH; for (int i = 0; i < FREQ_CAT_NUM; i++) mFreqCounter[i] = 0; active = true; }
public override ProbingState HandleData(byte[] buffer, int length) { SMState codingState; for (int i = 0; i < buffer.Length && i < length; i++) { codingState = mCodingSM.NextState(buffer[i]); if (codingState == SMState.Error) { mState = ProbingState.NotMe; break; } if (codingState == SMState.ItsMe) { mState = ProbingState.FoundIt; break; } if (codingState == SMState.Start) { if (mCodingSM.CurrentCharLen >= 2) mNumOfMBChar++; } } if (mState == ProbingState.Detecting) if (GetConfidence() > SHORTCUT_THRESHOLD) mState = ProbingState.FoundIt; return mState; }
public override ProbingState handleData(byte[] buf, int offset, int length) { ByteBuffer newBufTmp = filterWithEnglishLetters(buf, offset, length); byte charClass; byte freq; byte[] newBuf = newBufTmp.ToByteArray(); int newBufLen = newBufTmp.Position; for (int i=0; i<newBufLen; ++i) { int c = newBuf[i] & 0xFF; charClass = latin1CharToClass[c]; freq = latin1ClassModel[this.lastCharClass * CLASS_NUM + charClass]; if (freq == 0) { this.state = ProbingState.NOT_ME; break; } ++this.freqCounter[freq]; this.lastCharClass = charClass; } return this.state; }
public override ProbingState handleData(byte[] buf, int offset, int length) { short order; int maxPos = offset + length; for (int i=offset; i<maxPos; ++i) { order = this.model.getOrder(buf[i]); if (order < SYMBOL_CAT_ORDER) { ++this.totalChar; } if (order < SAMPLE_SIZE) { ++this.freqChar; if (this.lastOrder < SAMPLE_SIZE) { ++this.totalSeqs; if (!this.reversed) { ++(this.seqCounters[this.model.getPrecedence(this.lastOrder*SAMPLE_SIZE+order)]); } else { ++(this.seqCounters[this.model.getPrecedence(order*SAMPLE_SIZE+this.lastOrder)]); } } } this.lastOrder = order; } if (this.state == ProbingState.DETECTING) { if (this.totalSeqs > SB_ENOUGH_REL_THRESHOLD) { float cf = getConfidence(); if (cf > POSITIVE_SHORTCUT_THRESHOLD) { this.state = ProbingState.FOUND_IT; } else if (cf < NEGATIVE_SHORTCUT_THRESHOLD){ this.state = ProbingState.NOT_ME; } } } return this.state; }
public override void reset() { this.activeNum = 0; for (int i=0; i<this.probers.Length; ++i) { this.probers[i].reset(); this.isActive[i] = true; ++this.activeNum; } this.bestGuess = -1; this.state = ProbingState.DETECTING; }
public override void Reset() { mActiveNum = 0; foreach (AbstractCSProber prober in mProbers) { if (prober!=null) // not null { prober.Reset(); prober.IsActive = true; ++mActiveNum; } } mBestGuess = null; mState = ProbingState.Detecting; }