public static Ref <int> Insert(this StringIntCoding sic, string s) { int bufferMax = 5 * 1000 * 1000; Ref <int> codeRef = new Ref <int>(); Buffer.Add(s, codeRef); if (Buffer.Count == bufferMax) { InsertPortionForce(sic); } return(codeRef); }
private static void InsertPortionForce(StringIntCoding sic) { string[] arr = new string[Buffer.Count]; Buffer.Keys.CopyTo(arr, 0); // Array.Sort(arr); var values = sic.InsertPortion(arr); foreach (var keyValue in Buffer) { keyValue.Value.Value = values[keyValue.Key]; } }
public static void MainNew(string[] args) { string path = @"..\..\..\Databases\"; string src_path = @"D:\home\FactographDatabases\dataset\dataset1M.ttl"; StringIntCoding sic = new StringIntCoding(path); Console.WriteLine("Start"); DateTime tt0 = DateTime.Now; DateTime tt00 = tt0; int portion_size = 1000000; int n_portions = 10; sic.Clear(); HashSet <string> hs = new HashSet <string>(); }
// Тест преобразования Guid'ов public static void Main11(string[] args) { string path = @"..\..\..\Databases\"; StringIntCoding sic = new StringIntCoding(path); Console.WriteLine("Start"); DateTime tt0 = DateTime.Now; DateTime tt00 = tt0; int portion_size = 5000000; int n_portions = 20; sic.Clear(); HashSet <string> hs = new HashSet <string>(); //SortedSet<string> hs = new SortedSet<string>(); for (int j = 0; j < n_portions; j++) { tt0 = DateTime.Now; hs.Clear(); for (int i = 0; i < portion_size; i++) { string id = Guid.NewGuid().ToString(); hs.Add(id); } Console.WriteLine("Set ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; string[] arr = new string[hs.Count]; hs.CopyTo(arr); // Array.Sort<string>(arr); // Console.WriteLine("Sorting ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; var dic = sic.InsertPortion(arr); Console.WriteLine("InsertPortion ok. portion=" + j + " HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; } Console.WriteLine("Total: {0}", (DateTime.Now - tt00).Ticks / 10000L); }
public static void Main(string[] args) { string path = @"..\..\..\Databases\"; var strings = Enumerable.Range(0, 1000) .Select(i => Guid.NewGuid().ToString()) .Select((guid, i) => new { guid, i }) .ToDictionary(arg => arg.guid, arg => arg.i); StringIntRAMDIctionary ramdIctionary = new StringIntRAMDIctionary(path + "ram dictionary", strings); foreach (var g_i in strings) { if (g_i.Value != ramdIctionary.GetCode(g_i.Key)) { throw new KeyNotFoundException(); } } return; StringIntCoding sic = new StringIntCoding(path); Console.WriteLine("Start"); DateTime tt0 = DateTime.Now; DateTime tt00 = tt0; int portion_size = 100; int n_portions = 10; sic.Clear(); sic.InsertPortion((Enumerable.Range(-10, 0).Select(i => i.ToString())).ToArray()); sic.InsertPortion((Enumerable.Range(0, 10).Select(i => i.ToString())).ToArray()); sic.MakeIndexed(); Console.WriteLine(sic.GetName(4)); Console.WriteLine(sic.GetCode("0")); }
public static void Main2(string[] args) { string path = @"..\..\..\Databases\"; System.IO.StreamReader sr = new System.IO.StreamReader(@"F:\FactographData\freebase-rdf-2013-02-10-00-00.nt2"); StringIntCoding sic = new StringIntCoding(path); Console.WriteLine("Start"); DateTime tt0 = DateTime.Now; string line = ""; int linecnt = 0; int nportion = 20000000; List <string> ids = null; HashSet <string> hs = new HashSet <string>(); for (int j = 0; j < 10; j++) { tt0 = DateTime.Now; ids = new List <string>(nportion * 2); hs.Clear(); for (int i = 0; i < nportion; i++) { line = sr.ReadLine(); linecnt++; //if (linecnt % 1000 == 0) Console.Write(" " + linecnt); if (line == null) { break; } if (line.Length == 0) { continue; } if (line[0] == '@') { continue; } string[] parts = line.Split('\t'); if (parts.Length != 3) { continue; } //ids.Add(parts[0]); //ids.Add(parts[1]); hs.Add(parts[0]); char fc = parts[2][0]; if (fc != '\"' && fc != '<' && fc != '-' && !char.IsDigit(fc)) { string ss = parts[2].Substring(0, parts[2].Length - 1); //if (ss != "true" && ss != "false") ids.Add(ss); if (ss != "true" && ss != "false") { hs.Add(ss); } } //ids.Add(Guid.NewGuid().ToString()); } //Console.WriteLine("idlist ok. line="+ (linecnt / 1000000) +" HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; string[] arr = new string[hs.Count]; hs.CopyTo(arr); // Array.Sort<string>(arr); //Console.WriteLine("Sorting ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; var dic = sic.InsertPortion(arr); Console.WriteLine("InsertPortion ok. line=" + (linecnt / 1000000) + " HashSet.Size=" + hs.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; if (line == null) { break; } tt0 = DateTime.Now; // Сборка мусора GC.Collect(); //Console.WriteLine("GC ok. duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; } tt0 = DateTime.Now; sic.MakeIndexed(); Console.WriteLine("Indexes ok. Count=" + sic.Count + " duration=" + (DateTime.Now - tt0).Ticks / 10000L); tt0 = DateTime.Now; //Console.WriteLine("dic count=" + dic.Count()); //int code = sic.GetCode("zzz"); //Console.WriteLine(code); //string name = sic.GetName(5); //Console.WriteLine(name); //Console.WriteLine("count=" + sic.Count()); //Console.WriteLine("duration=" + (DateTime.Now - tt0).Ticks / 10000L); }