예제 #1
0
        static void TestBlum(string path)
        {
            var buf = File.ReadAllBytes(path);

            var tests = LoadTestFiles(spath_tests_folder);

            // load
            int maxStringLen = BitConverter.ToInt32(DataPack.Unpack(buf, 0), 0);
            //int count = BitConverter.ToInt32(DataPack.Unpack(buf, 0),sizeof(int));
            int filterBufLen = BitConverter.ToInt32(DataPack.Unpack(buf, 0), sizeof(int));

            byte[] arr = DataPack.Unpack(buf, 1);

            var excs2 = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 2)));
            var ends  = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 3)));
            var d2b   = SplitString(ASCIIEncoding.ASCII.GetString(DataPack.Unpack(buf, 4)));

            var bf = new BlumFilter(filterBufLen);

            bf.SetArray(arr);

            var flt = new PreFilter(excs2, ends, d2b, maxStringLen);

            int errcount = 0;

            foreach (var t in tests)
            {
                bool   isWord = false;
                var    s      = t.Key.ToLower().Trim();
                string sb;

                var sf = flt.WordStatus(s, out sb);
                if (sf == 0)
                {
                    isWord = false;
                }
                else if (sf == 1)
                {
                    isWord = true;
                }
                else
                {
                    isWord = bf.Contains(sb);
                }

                if (isWord && !t.Value)
                {
                    errcount++;
                }
                if (!isWord && t.Value)
                {
                    errcount++;
                }
            }

            Console.WriteLine("Errors:{0} from {1}, {2}",
                              errcount, tests.Count, (float)errcount / (float)(tests.Count) * 100.0F);
        }
예제 #2
0
        static void Main(string[] args)
        {
            // loadTests(spath_tests_folder);
            // CombineTests(spath_tests_folder, spath_tests_2);

            int    filterBufLen = 506464;
            int    maxStringLen = 12;
            int    endingsCount = 23;
            double err          = 0.04;

            var sz = LoadAndOrderAndSave(spath0, spath2, spathe, maxStringLen);

            SaveAvailable2Beg(spath2, spath2e, spath3, err);

            TreeAnalyze(spath3, spath_ends, maxStringLen, filterBufLen, endingsCount);

            var buf = File.ReadAllLines(spath3);

            var excs2 = File.ReadAllLines(spathe).ToList();
            var ends  = File.ReadAllLines(spath_ends).ToList();
            var d2b   = File.ReadAllLines(spath2e).ToList();

            Console.WriteLine("d2b:{0}", d2b.Count);

            var flt = new PreFilter(excs2, ends, d2b, maxStringLen);

            Dictionary <string, bool> dicf = new Dictionary <string, bool>();

            List <string> buf2 = new List <string>();

            Console.WriteLine("Words before filtering:{0}", buf.Length);
            // filter buffer
            foreach (var sb in buf)
            {
                string sbase;
                if (flt.WordStatus(sb, out sbase) == 2)
                {
                    dicf[sbase] = true;
                }
            }

            buf2 = dicf.Keys.ToList();
            buf2.Sort();

            Console.WriteLine("Words after {0} level filtering:{1}", ends.Count, buf2.Count);

            var bf = new BlumFilter(filterBufLen);

            bf.AddRange(buf2);

            if (File.Exists(spath_filter))
            {
                File.Delete(spath_filter);
            }

            File.WriteAllBytes(spath_filter, bf.GetArray());

            // pack data
            DataPack.Pack(maxStringLen, buf2.Count, filterBufLen, spath_data, spath_filter, spathe, spath_ends, spath2e);

            TestBlum(spath_data);
            var cs = CalcSources();

            Console.WriteLine("Packet size:{0} Kb, overflow:{1} bytes", cs / 1024.0, cs - 64 * 1024);
            Console.ReadLine();

            return;
        }
예제 #3
0
        static void TreeAnalyze(string path, string path2, int maxStringLen, int arrLen, int endsLimit)
        {
            var dic  = new Dictionary <string, Pair3>();
            var dic2 = new Dictionary <string, bool>();

            PreFilter flt = new PreFilter(new List <string>(), new List <string>(), new List <string>(), maxStringLen);
            var       fsl = new List <string>();

            foreach (var ss in File.ReadAllLines(path))
            {
                string bs;
                if (flt.WordStatus(ss, out bs) == 2)
                {
                    fsl.Add(ss);
                }
            }

            foreach (var s in fsl)
            {
                dic2[s] = true;
            }

            // *********************************************
            int sizeLimit     = arrLen;
            int elementsCount = dic2.Count;

            foreach (var s in fsl)
            {
                // get all endings
                for (int i = 1; i < (s.Length - 1); i++)
                {
                    var sb    = s.Substring(i);
                    var sbase = s.Substring(0, i);

                    Pair3 v;

                    if (dic2.ContainsKey(sbase))
                    {
                        if (!dic.TryGetValue(sb, out v))
                        {
                            v       = new Pair3(sb, 0, 0, sizeLimit, elementsCount);
                            dic[sb] = v;
                        }

                        v.Correlations++;
                    }

                    if (dic.TryGetValue(sb, out v))
                    {
                        v.Count++;
                    }
                }
            }

            Debug.WriteLine("Endings:{0}", dic.Count);

            var lst = dic.Values.ToList();

            lst.Sort((x, y) =>
            {
                return(x.Error.CompareTo(y.Error));
            });

            var dic3 = new Dictionary <string, Pair3>();

            lst.ForEach(l =>
            {
                bool isFound = false;
                for (int i = 0; i < (l.Ends.Length - 1); i++)
                {
                    var sv = l.Ends.Substring(i);
                    if (dic3.ContainsKey(sv))
                    {
                        isFound = true;
                        break;
                    }
                }

                if (!isFound)
                {
                    dic3.Add(l.Ends, l);
                }
            });

            lst = dic3.Values.ToList();

            lst.Sort((x, y) =>
            {
                return(x.Error.CompareTo(y.Error));
            });

            Debug.WriteLine("Endings reduced:{0}", lst.Count);
            dic3.Clear();
            // go from the end

            for (int j = lst.Count - 1; j >= 0; j--)
            {
                for (int i = 0; i < (lst[j].Ends.Length - 1); i++)
                {
                    var sv = lst[j].Ends.Substring(i);
                    if (dic3.ContainsKey(sv))
                    {
                        dic3.Remove(sv);
                    }
                }

                dic3.Add(lst[j].Ends, lst[j]);
            }

            lst = dic3.Values.ToList();

            lst.Sort((x, y) =>
            {
                return(x.Error.CompareTo(y.Error));
            });

            Debug.WriteLine("Endings reduced:{0}", lst.Count);

            if (File.Exists(path2))
            {
                File.Delete(path2);
            }

            var ends = lst.Select(l => l.Ends).Take(endsLimit);

            File.WriteAllLines(path2, ends);
        }