static void TestBlum(string path) { var buf = File.ReadAllBytes(path); var tests = LoadTestFiles(spath_tests_folder); // load int maxStringLen = BitConverter.ToInt32(DataPack.Unpack(buf, 0), 0); //int count = BitConverter.ToInt32(DataPack.Unpack(buf, 0),sizeof(int)); int filterBufLen = BitConverter.ToInt32(DataPack.Unpack(buf, 0), sizeof(int)); byte[] arr = DataPack.Unpack(buf, 1); var excs2 = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 2))); var ends = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 3))); var d2b = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 4))); var bf = new BlumFilter(filterBufLen); bf.SetArray(arr); var flt = new PreFilter(excs2, ends, d2b, maxStringLen); int errcount = 0; foreach (var t in tests) { bool isWord = false; var s = t.Key.ToLower().Trim(); string sb; var sf = flt.WordStatus(s, out sb); if (sf == 0) { isWord = false; } else if (sf == 1) { isWord = true; } else { isWord = bf.Contains(sb); } if (isWord && !t.Value) { errcount++; } if (!isWord && t.Value) { errcount++; } } Console.WriteLine("Errors:{0} from {1}, {2}", errcount, tests.Count, (float)errcount / (float)(tests.Count) * 100.0F); }
static void Main(string[] args) { // loadTests(spath_tests_folder); // CombineTests(spath_tests_folder, spath_tests_2); int filterBufLen = 506464; int maxStringLen = 12; int endingsCount = 23; double err = 0.04; var sz = LoadAndOrderAndSave(spath0, spath2, spathe, maxStringLen); SaveAvailable2Beg(spath2, spath2e, spath3, err); TreeAnalyze(spath3, spath_ends, maxStringLen, filterBufLen, endingsCount); var buf = File.ReadAllLines(spath3); var excs2 = File.ReadAllLines(spathe).ToList(); var ends = File.ReadAllLines(spath_ends).ToList(); var d2b = File.ReadAllLines(spath2e).ToList(); Console.WriteLine("d2b:{0}", d2b.Count); var flt = new PreFilter(excs2, ends, d2b, maxStringLen); Dictionary <string, bool> dicf = new Dictionary <string, bool>(); List <string> buf2 = new List <string>(); Console.WriteLine("Words before filtering:{0}", buf.Length); // filter buffer foreach (var sb in buf) { string sbase; if (flt.WordStatus(sb, out sbase) == 2) { dicf[sbase] = true; } } buf2 = dicf.Keys.ToList(); buf2.Sort(); Console.WriteLine("Words after {0} level filtering:{1}", ends.Count, buf2.Count); var bf = new BlumFilter(filterBufLen); bf.AddRange(buf2); if (File.Exists(spath_filter)) { File.Delete(spath_filter); } File.WriteAllBytes(spath_filter, bf.GetArray()); // pack data DataPack.Pack(maxStringLen, buf2.Count, filterBufLen, spath_data, spath_filter, spathe, spath_ends, spath2e); TestBlum(spath_data); var cs = CalcSources(); Console.WriteLine("Packet size:{0} Kb, overflow:{1} bytes", cs / 1024.0, cs - 64 * 1024); Console.ReadLine(); return; }
static void TreeAnalyze(string path, string path2, int maxStringLen, int arrLen, int endsLimit) { var dic = new Dictionary <string, Pair3>(); var dic2 = new Dictionary <string, bool>(); PreFilter flt = new PreFilter(new List <string>(), new List <string>(), new List <string>(), maxStringLen); var fsl = new List <string>(); foreach (var ss in File.ReadAllLines(path)) { string bs; if (flt.WordStatus(ss, out bs) == 2) { fsl.Add(ss); } } foreach (var s in fsl) { dic2[s] = true; } // ********************************************* int sizeLimit = arrLen; int elementsCount = dic2.Count; foreach (var s in fsl) { // get all endings for (int i = 1; i < (s.Length - 1); i++) { var sb = s.Substring(i); var sbase = s.Substring(0, i); Pair3 v; if (dic2.ContainsKey(sbase)) { if (!dic.TryGetValue(sb, out v)) { v = new Pair3(sb, 0, 0, sizeLimit, elementsCount); dic[sb] = v; } v.Correlations++; } if (dic.TryGetValue(sb, out v)) { v.Count++; } } } Debug.WriteLine("Endings:{0}", dic.Count); var lst = dic.Values.ToList(); lst.Sort((x, y) => { return(x.Error.CompareTo(y.Error)); }); var dic3 = new Dictionary <string, Pair3>(); lst.ForEach(l => { bool isFound = false; for (int i = 0; i < (l.Ends.Length - 1); i++) { var sv = l.Ends.Substring(i); if (dic3.ContainsKey(sv)) { isFound = true; break; } } if (!isFound) { dic3.Add(l.Ends, l); } }); lst = dic3.Values.ToList(); lst.Sort((x, y) => { return(x.Error.CompareTo(y.Error)); }); Debug.WriteLine("Endings reduced:{0}", lst.Count); dic3.Clear(); // go from the end for (int j = lst.Count - 1; j >= 0; j--) { for (int i = 0; i < (lst[j].Ends.Length - 1); i++) { var sv = lst[j].Ends.Substring(i); if (dic3.ContainsKey(sv)) { dic3.Remove(sv); } } dic3.Add(lst[j].Ends, lst[j]); } lst = dic3.Values.ToList(); lst.Sort((x, y) => { return(x.Error.CompareTo(y.Error)); }); Debug.WriteLine("Endings reduced:{0}", lst.Count); if (File.Exists(path2)) { File.Delete(path2); } var ends = lst.Select(l => l.Ends).Take(endsLimit); File.WriteAllLines(path2, ends); }