internal void ProcessText(string text) { foreach (var encoding in encodings) { var ms = new MemoryStream(encoding.GetBytes(text)); CurrentEncoding = encoding; NGramHelper.ForEachNGram(ms, 1, ProcessNGram); NGramHelper.ForEachNGram(ms, 2, ProcessNGram); } }
public Dictionary <int, double> GetMarks(byte[] bytes) { var ret = new Dictionary <int, double>(); int pos = 0; Func <int> readByte = () => { int byteValue = pos < bytes.Length ? bytes[pos] : -1; pos++; return(byteValue); }; foreach (var pair in P0) { ret[pair.Key] = Math.Log(pair.Value / (1.0 - pair.Value)); } NGramHelper.ForEachNGram(CodeSize, readByte, (size, value) => { if (!Codes.ContainsKey(value)) { return; } foreach (var pair in Codes[value]) { if (!ret.ContainsKey(pair.Key)) { continue; } ret[pair.Key] += Math.Log(pair.Value / (1.0 - pair.Value)); } }); var keys = ret.Keys.ToArray(); double?minValue = null, maxValue = null; foreach (var key in keys) { var x = ret[key]; if (minValue == null || minValue.Value > x) { minValue = x; } if (maxValue == null || maxValue.Value < x) { maxValue = x; } } if (maxValue.Value - minValue.Value >= 100) { var scale = 10.0 / (maxValue.Value - minValue.Value); foreach (var key in keys) { ret[key] = (ret[key] - minValue.Value) * scale - 5; } } var pSum = 0.0; foreach (var key in keys) { var x = Math.Exp(Math.Min(100.0, ret[key])); var p = x / (1.0 + x); pSum += p; ret[key] = p; } foreach (var key in keys) { ret[key] /= pSum; } return(ret); }