예제 #1
0
        public static Ref <int> Insert(this StringIntCoding sic, string s)
        {
            int       bufferMax = 5 * 1000 * 1000;
            Ref <int> codeRef   = new Ref <int>();

            Buffer.Add(s, codeRef);
            if (Buffer.Count == bufferMax)
            {
                InsertPortionForce(sic);
            }
            return(codeRef);
        }
예제 #2
0
        private static void InsertPortionForce(StringIntCoding sic)
        {
            string[] arr = new string[Buffer.Count];
            Buffer.Keys.CopyTo(arr, 0);
            //   Array.Sort(arr);
            var values = sic.InsertPortion(arr);

            foreach (var keyValue in Buffer)
            {
                keyValue.Value.Value = values[keyValue.Key];
            }
        }
예제 #3
0
        public static void MainNew(string[] args)
        {
            string path     = @"..\..\..\Databases\";
            string src_path = @"D:\home\FactographDatabases\dataset\dataset1M.ttl";

            StringIntCoding sic = new StringIntCoding(path);

            Console.WriteLine("Start");
            DateTime tt0  = DateTime.Now;
            DateTime tt00 = tt0;

            int portion_size = 1000000;
            int n_portions   = 10;

            sic.Clear();
            HashSet <string> hs = new HashSet <string>();
        }
예제 #4
0
        // Тест преобразования Guid'ов
        public static void Main11(string[] args)
        {
            string path = @"..\..\..\Databases\";

            StringIntCoding sic = new StringIntCoding(path);

            Console.WriteLine("Start");
            DateTime tt0  = DateTime.Now;
            DateTime tt00 = tt0;

            int portion_size = 5000000;
            int n_portions   = 20;

            sic.Clear();
            HashSet <string> hs = new HashSet <string>();

            //SortedSet<string> hs = new SortedSet<string>();
            for (int j = 0; j < n_portions; j++)
            {
                tt0 = DateTime.Now;
                hs.Clear();
                for (int i = 0; i < portion_size; i++)
                {
                    string id = Guid.NewGuid().ToString();
                    hs.Add(id);
                }

                Console.WriteLine("Set ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
                string[] arr = new string[hs.Count];
                hs.CopyTo(arr);
                //  Array.Sort<string>(arr);
                //  Console.WriteLine("Sorting ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
                var dic = sic.InsertPortion(arr);
                Console.WriteLine("InsertPortion ok. portion=" + j + " HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
            }
            Console.WriteLine("Total: {0}", (DateTime.Now - tt00).Ticks / 10000L);
        }
예제 #5
0
        public static void Main(string[] args)
        {
            string path = @"..\..\..\Databases\";

            var strings =
                Enumerable.Range(0, 1000)
                .Select(i => Guid.NewGuid().ToString())
                .Select((guid, i) => new { guid, i })
                .ToDictionary(arg => arg.guid, arg => arg.i);
            StringIntRAMDIctionary ramdIctionary = new StringIntRAMDIctionary(path + "ram dictionary", strings);

            foreach (var g_i in strings)
            {
                if (g_i.Value != ramdIctionary.GetCode(g_i.Key))
                {
                    throw new KeyNotFoundException();
                }
            }
            return;

            StringIntCoding sic = new StringIntCoding(path);

            Console.WriteLine("Start");
            DateTime tt0  = DateTime.Now;
            DateTime tt00 = tt0;

            int portion_size = 100;
            int n_portions   = 10;

            sic.Clear();
            sic.InsertPortion((Enumerable.Range(-10, 0).Select(i => i.ToString())).ToArray());
            sic.InsertPortion((Enumerable.Range(0, 10).Select(i => i.ToString())).ToArray());
            sic.MakeIndexed();
            Console.WriteLine(sic.GetName(4));
            Console.WriteLine(sic.GetCode("0"));
        }
예제 #6
0
        public static void Main2(string[] args)
        {
            string path = @"..\..\..\Databases\";

            System.IO.StreamReader sr = new System.IO.StreamReader(@"F:\FactographData\freebase-rdf-2013-02-10-00-00.nt2");

            StringIntCoding sic = new StringIntCoding(path);

            Console.WriteLine("Start");
            DateTime tt0 = DateTime.Now;

            string line     = "";
            int    linecnt  = 0;
            int    nportion = 20000000;

            List <string>    ids = null;
            HashSet <string> hs  = new HashSet <string>();

            for (int j = 0; j < 10; j++)
            {
                tt0 = DateTime.Now;
                ids = new List <string>(nportion * 2);
                hs.Clear();
                for (int i = 0; i < nportion; i++)
                {
                    line = sr.ReadLine(); linecnt++;
                    //if (linecnt % 1000 == 0) Console.Write(" " + linecnt);
                    if (line == null)
                    {
                        break;
                    }
                    if (line.Length == 0)
                    {
                        continue;
                    }
                    if (line[0] == '@')
                    {
                        continue;
                    }
                    string[] parts = line.Split('\t');
                    if (parts.Length != 3)
                    {
                        continue;
                    }
                    //ids.Add(parts[0]);
                    //ids.Add(parts[1]);
                    hs.Add(parts[0]);
                    char fc = parts[2][0];
                    if (fc != '\"' && fc != '<' && fc != '-' && !char.IsDigit(fc))
                    {
                        string ss = parts[2].Substring(0, parts[2].Length - 1);
                        //if (ss != "true" && ss != "false") ids.Add(ss);
                        if (ss != "true" && ss != "false")
                        {
                            hs.Add(ss);
                        }
                    }

                    //ids.Add(Guid.NewGuid().ToString());
                }
                //Console.WriteLine("idlist ok. line="+ (linecnt / 1000000) +" HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
                string[] arr = new string[hs.Count];
                hs.CopyTo(arr);
                // Array.Sort<string>(arr);
                //Console.WriteLine("Sorting ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
                var dic = sic.InsertPortion(arr);
                Console.WriteLine("InsertPortion ok. line=" + (linecnt / 1000000) + " HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
                if (line == null)
                {
                    break;
                }
                tt0 = DateTime.Now;
                // Сборка мусора
                GC.Collect();
                //Console.WriteLine("GC ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;
            }
            tt0 = DateTime.Now;
            sic.MakeIndexed();
            Console.WriteLine("Indexes ok. Count=" + sic.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now;

            //Console.WriteLine("dic count=" + dic.Count());

            //int code = sic.GetCode("zzz");
            //Console.WriteLine(code);
            //string name = sic.GetName(5);
            //Console.WriteLine(name);
            //Console.WriteLine("count=" + sic.Count());
            //Console.WriteLine("duration=" + (DateTime.Now - tt0).Ticks / 10000L);
        }