public void DeletePropIndex(string type_name, string prop_name) { if (!existing_indexes.ContainsKey(type_name) || !existing_indexes[type_name].Contains(prop_name)) { return; } var _write_lock = GetTableWriteLock(type_name); lock (_write_lock) { var alive = snapshots_alive[type_name + "|" + prop_name]; var new_list = new List <Tuple <bool, string> >(); foreach (var sn in alive) { IndexGeneric val = indexes[sn.Item2]; if (val.IndexType == IndexType.PropertyOnly) { existing_indexes[type_name].Remove(prop_name); indexes.TryRemove(sn.Item2, out val); string ls = null; latest_snapshots.TryRemove(type_name + "|" + prop_name, out ls); } else { val.IndexType = IndexType.GroupOnly; foreach (var p in val.Parts) { p.DoubleValues = null; p.Ids = null; p.IntValues = null; } new_list.Add(sn); } } if (new_list.Any()) { snapshots_alive[type_name + "|" + prop_name] = new_list; } else { List <Tuple <bool, string> > val = null; snapshots_alive.TryRemove(type_name + "|" + prop_name, out val); } } }
public void CreateGroupByIndexString(string table_name, string group_name, string value_name) { var _write_lock = GetTableWriteLock(table_name); lock (_write_lock) { using (var snapshot = leveld_db.CreateSnapshot()) { var table_info = GetTableInfo(table_name); if (table_info.Columns[value_name] == LinqDbTypes.binary_ || table_info.Columns[value_name] == LinqDbTypes.string_) { throw new LinqDbException("Linqdb: Property type is not supported as memory index: " + value_name); } if (table_info.Columns[group_name] != LinqDbTypes.int_) { throw new LinqDbException("Linqdb: Property type is not supported as group by column: " + group_name); } string snapshot_id = Ldb.GetNewSpnapshotId(); var ro = new ReadOptions().SetSnapshot(snapshot); int total = GetTableRowCount(table_info, ro); List <int> ids = null; if (!existing_indexes.ContainsKey(table_name) || !existing_indexes[table_name].Contains("Id")) { IndexGeneric index = new IndexGeneric() { ColumnName = "Id", ColumnType = LinqDbTypes.int_, TypeName = table_name, Parts = new List <IndexPart>(), IndexType = IndexType.PropertyOnly }; ids = ReadAllIds(table_info, ro, total); int counter = 0; IndexPart cpart = null; foreach (var id in ids) { if (counter % 1000 == 0) { cpart = new IndexPart() { IntValues = new List <int>(1000) }; index.Parts.Add(cpart); } cpart.IntValues.Add(id); counter++; } if (!existing_indexes.ContainsKey(table_name)) { existing_indexes[table_name] = new HashSet <string>() { "Id" }; } else { existing_indexes[table_name].Add("Id"); } indexes[table_name + "|Id|" + snapshot_id] = index; latest_snapshots[table_name + "|Id"] = snapshot_id; snapshots_alive.TryAdd(table_name + "|Id", new List <Tuple <bool, string> >() { new Tuple <bool, string>(false, table_name + "|Id|" + snapshot_id) }); last_cleanup.TryAdd(table_name + "|Id", DateTime.Now); // var skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers["Id"]); leveld_db.Put(skey, Encoding.UTF8.GetBytes(snapshot_id)); } else { ids = new List <int>(total); var skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers["Id"]); var snapid = leveld_db.Get(skey, null, ro); var id_snapshot_id = Encoding.UTF8.GetString(snapid); var index = indexes[table_name + "|Id|" + id_snapshot_id]; for (int i = 0; i < index.Parts.Count(); i++) { ids.AddRange(index.Parts[i].IntValues); } } if (!existing_indexes[table_name].Contains(group_name)) { IndexGeneric index = new IndexGeneric() { ColumnName = group_name, ColumnType = LinqDbTypes.int_, TypeName = table_name, GroupListMapping = new ConcurrentDictionary <int, int>(), Parts = new List <IndexPart>(), IndexType = IndexType.GroupOnly }; int totalex = 0; if ((ids.Any() ? ids.Max() : 0) < 250000000) { var ivalues = ReadAllIntValuesList(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, out totalex); if (totalex != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalex + " != " + ids.Count() + ")"); } int map = 0; foreach (var id in ids) { if (!index.GroupListMapping.ContainsKey(ivalues[id])) { index.GroupListMapping[ivalues[id]] = map; map++; } } var latest_snapshot_id = latest_snapshots[table_name + "|Id"]; var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { GroupValues = new List <int>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var ival = ivalues[part.IntValues[k]]; var val = index.GroupListMapping[ival]; new_part.GroupValues.Add(val); } } } else { var ivalues = ReadAllIntValuesDic(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, total, out totalex); if (totalex != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalex + " != " + ids.Count() + ")"); } int map = 0; foreach (var id in ids) { if (!index.GroupListMapping.ContainsKey(ivalues[id])) { index.GroupListMapping[ivalues[id]] = map; map++; } } var latest_snapshot_id = latest_snapshots[table_name + "|Id"]; var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { GroupValues = new List <int>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var ival = ivalues[part.IntValues[k]]; var val = index.GroupListMapping[ival]; new_part.GroupValues.Add(val); } } } existing_indexes[table_name].Add(group_name); indexes[table_name + "|" + group_name + "|" + snapshot_id] = index; latest_snapshots[table_name + "|" + group_name] = snapshot_id; snapshots_alive.TryAdd(table_name + "|" + group_name, new List <Tuple <bool, string> >() { new Tuple <bool, string>(false, table_name + "|" + group_name + "|" + snapshot_id) }); last_cleanup.TryAdd(table_name + "|" + group_name, DateTime.Now); // var skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers[group_name]); leveld_db.Put(skey, Encoding.UTF8.GetBytes(snapshot_id)); } else //could be property index, we need a group one { var latest_snapshot_id = latest_snapshots[table_name + "|" + group_name]; var index = indexes[table_name + "|" + group_name + "|" + latest_snapshot_id]; if (index.IndexType == IndexType.PropertyOnly) { index.GroupListMapping = new ConcurrentDictionary <int, int>(); int map = 0; for (int i = 0; i < index.Parts.Count(); i++) { int icount = index.Parts[i].IntValues.Count(); index.Parts[i].GroupValues = new List <int>(icount); var gv = index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { var val = index.Parts[i].IntValues[j]; var ival = (int)val; if (!index.GroupListMapping.ContainsKey(ival)) { index.GroupListMapping[ival] = map; map++; } gv.Add(index.GroupListMapping[ival]); } } index.IndexType = IndexType.Both; } } if (!existing_indexes[table_name].Contains(value_name)) { IndexGeneric index = new IndexGeneric() { ColumnName = value_name, TypeName = table_name, Parts = new List <IndexPart>(), IndexType = IndexType.PropertyOnly }; if (ids == null) { ids = ReadAllValues("Id", table_info, new OperResult() { All = true }, ro, new ReadByteCount() { read_size_bytes = -Int32.MaxValue }, total).Item1; } var latest_snapshot_id = latest_snapshots[table_name + "|Id"]; switch (table_info.Columns[value_name]) { case LinqDbTypes.int_: index.ColumnType = LinqDbTypes.int_; int totalex = 0; if ((ids.Any() ? ids.Max() : 0) < 250000000) { var ivalues = ReadAllIntValuesList(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, out totalex); if (totalex != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalex + " != " + ids.Count() + ")"); } var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { IntValues = new List <int>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var val = ivalues[(int)part.IntValues[k]]; new_part.IntValues.Add(val); } } } else { var ivalues = ReadAllIntValuesDic(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, ids.Count(), out totalex); if (totalex != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalex + " != " + ids.Count() + ")"); } var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { IntValues = new List <int>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var val = ivalues[(int)part.IntValues[k]]; new_part.IntValues.Add(val); } } } break; case LinqDbTypes.double_: case LinqDbTypes.DateTime_: index.ColumnType = LinqDbTypes.double_; int totalexd = 0; if ((ids.Any() ? ids.Max() : 0) < 250000000) { var dvalues = ReadAllDoubleValuesList(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, out totalexd); if (totalexd != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalexd + " != " + ids.Count() + ")"); } var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { DoubleValues = new List <double>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var val = dvalues[(int)part.IntValues[k]]; new_part.DoubleValues.Add(val); } } } else { var dvalues = ReadAllDoubleValuesDic(index.ColumnName, table_info, ro, ids.Any() ? ids.Max() : 0, ids.Count(), out totalexd); if (totalexd != ids.Count()) { throw new LinqDbException("Linqdb: column " + index.ColumnName + " has gaps in data. Prior to building an index gaps must be updated with values. (" + totalexd + " != " + ids.Count() + ")"); } var id_index = indexes[table_name + "|Id|" + latest_snapshot_id]; for (int j = 0; j < id_index.Parts.Count(); j++) { var part = id_index.Parts[j]; var new_part = new IndexPart() { DoubleValues = new List <double>(part.IntValues.Count()) }; index.Parts.Add(new_part); for (int k = 0; k < part.IntValues.Count(); k++) { var val = dvalues[(int)part.IntValues[k]]; new_part.DoubleValues.Add(val); } } } break; } existing_indexes[table_name].Add(value_name); indexes[table_name + "|" + value_name + "|" + snapshot_id] = index; latest_snapshots[table_name + "|" + value_name] = snapshot_id; snapshots_alive.TryAdd(table_name + "|" + value_name, new List <Tuple <bool, string> >() { new Tuple <bool, string>(false, table_name + "|" + value_name + "|" + snapshot_id) }); last_cleanup.TryAdd(table_name + "|" + value_name, DateTime.Now); // var skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers[value_name]); leveld_db.Put(skey, Encoding.UTF8.GetBytes(snapshot_id)); } } } GC.Collect(); GC.WaitForPendingFinalizers(); }
Tuple <List <KeyValuePair <int, int> >, List <KeyValuePair <int, double> >, List <int> > MatchesFromIndex(List <int> ids, IndexGeneric index, IndexGeneric ids_index, LinqDbTypes type) { List <KeyValuePair <int, int> > res_int = null; List <KeyValuePair <int, double> > res_double = null; int max = Int32.MinValue; bool is_sorted = true; int curr = Int32.MinValue; for (int i = 0; i < ids.Count(); i++) { if (max < ids[i]) { max = ids[i]; } if (is_sorted && curr > ids[i]) { is_sorted = false; } curr = ids[i]; } int max_size = 300000000; if (max < max_size) { List <bool> check_list = new List <bool>(max + 1); for (int i = 0; i < max + 1; i++) { check_list.Add(false); } foreach (var id in ids) { check_list[id] = true; } if (type == LinqDbTypes.int_) { res_int = new List <KeyValuePair <int, int> >(ids.Count()); for (int i = 0; i < index.Parts.Count(); i++) { var val_part = index.Parts[i]; var iids = ids_index.Parts[i].IntValues; int icount = iids.Count(); for (int j = 0; j < icount; j++) { var id = iids[j]; if (id <= max && check_list[id]) { res_int.Add(new KeyValuePair <int, int>(id, val_part.IntValues[j])); } } } } else { res_double = new List <KeyValuePair <int, double> >(ids.Count()); for (int i = 0; i < index.Parts.Count(); i++) { var val_part = index.Parts[i]; var iids = ids_index.Parts[i].IntValues; int icount = iids.Count(); for (int j = 0; j < icount; j++) { var id = iids[j]; if (id <= max && check_list[id]) { res_double.Add(new KeyValuePair <int, double>(id, val_part.DoubleValues[j])); } } } } return(new Tuple <List <KeyValuePair <int, int> >, List <KeyValuePair <int, double> >, List <int> >(res_int, res_double, null)); } else { int bloom_size = 30000000; List <bool> bloom_filter = new List <bool>(bloom_size); for (int i = 0; i < bloom_size; i++) { bloom_filter.Add(false); } foreach (var id in ids) { bloom_filter[id % bloom_size] = true; } if (!is_sorted) { ids = ids.OrderBy(f => f).ToList(); } if (type == LinqDbTypes.int_) { res_int = new List <KeyValuePair <int, int> >(ids.Count()); for (int i = 0; i < ids_index.Parts.Count(); i++) { var val_part = index.Parts[i]; var iids = ids_index.Parts[i].IntValues; int icount = iids.Count(); for (int j = 0; j < icount; j++) { var id = iids[j]; if (bloom_filter[id % bloom_size] && ids.BinarySearch(id) >= 0) { res_int.Add(new KeyValuePair <int, int>(id, val_part.IntValues[j])); } } } } else { res_double = new List <KeyValuePair <int, double> >(ids.Count()); for (int i = 0; i < ids_index.Parts.Count(); i++) { var val_part = index.Parts[i]; var iids = ids_index.Parts[i].IntValues; int icount = iids.Count(); for (int j = 0; j < icount; j++) { var id = iids[j]; if (bloom_filter[id % bloom_size] && ids.BinarySearch(id) >= 0) { res_double.Add(new KeyValuePair <int, double>(id, val_part.DoubleValues[j])); } } } } return(new Tuple <List <KeyValuePair <int, int> >, List <KeyValuePair <int, double> >, List <int> >(res_int, res_double, ids)); } }
Tuple <List <Tuple <int, int> >, List <Tuple <int, double> > > EfficientContainsOneCall(IndexGeneric ids, IndexGeneric data, List <int> res_ids, bool is_double) { List <Tuple <int, int> > res_int = null; List <Tuple <int, double> > res_double = null; if (!is_double) { res_int = new List <Tuple <int, int> >(); } else { res_double = new List <Tuple <int, double> >(); } var res = new Tuple <List <Tuple <int, int> >, List <Tuple <int, double> > >(res_int, res_double); int max = Int32.MinValue; int min = Int32.MaxValue; for (int i = 0; i < res_ids.Count(); i++) { if (max < res_ids[i]) { max = res_ids[i]; } if (min > res_ids[i]) { min = res_ids[i]; } } int max_size = 300000000; var size = max - min + 1; if (size > max_size) { throw new LinqDbException("Linqdb: id too large..."); } var check_list = new List <bool>(size); for (int i = 0; i < size; i++) { check_list.Add(false); } foreach (var id in res_ids) { check_list[id - min] = true; } if (!is_double) { for (int i = 0; i < data.Parts.Count(); i++) { int icount = data.Parts[i].IntValues.Count(); var idss = ids.Parts[i].IntValues; var gv = data.Parts[i].IntValues; for (int j = 0; j < icount; j++) { int id = idss[j] - min; if (id >= 0 && id < size && check_list[id]) { res_int.Add(new Tuple <int, int>(idss[j], gv[j])); } } } } else { for (int i = 0; i < data.Parts.Count(); i++) { int icount = data.Parts[i].DoubleValues.Count(); var idss = ids.Parts[i].IntValues; var gv = data.Parts[i].DoubleValues; for (int j = 0; j < icount; j++) { int id = idss[j] - min; if (id >= 0 && id < size && check_list[id]) { res_double.Add(new Tuple <int, double>(idss[j], gv[j])); } } } } return(res); }
public Dictionary <int, List <object> > SelectGrouppedCommon(HashSet <int> distinct_groups, OperResult fres, List <Tuple <string, string> > aggregates, TableInfo table_info, short column_number, string column_name, ReadOptions ro, int count) { Dictionary <int, List <object> > cdata = new Dictionary <int, List <object> >(); //var max_id = 0; //List<byte> check_list = null; //int bloom_max = 2000000; //List<byte> bloom_int = null; List <int> groups = new List <int>(); var skey = MakeSnapshotKey(table_info.TableNumber, /*tree.GroupingInfo.ColumnNumber*/ column_number); var snapid = leveld_db.Get(skey, null, ro); if (snapid == null || !existing_indexes.ContainsKey(table_info.Name) || !existing_indexes[table_info.Name].Contains(column_name)) { throw new LinqDbException("Linqdb: group index on property " + /*tree.GroupingInfo.ColumnName*/ column_name + " does not exist."); } string snapshot_id = Encoding.UTF8.GetString(snapid); skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers["Id"]); snapid = leveld_db.Get(skey, null, ro); string id_snapshot_id = Encoding.UTF8.GetString(snapid); if (!indexes.ContainsKey(table_info.Name + "|" + /*tree.GroupingInfo.ColumnName*/ column_name + "|" + snapshot_id)) { throw new LinqDbException("Linqdb: group index on property " + /*tree.GroupingInfo.ColumnName*/ column_name + " does not exist."); } var group_index = indexes[table_info.Name + "|" + /*tree.GroupingInfo.ColumnName*/ column_name + "|" + snapshot_id]; if (group_index.IndexType == IndexType.PropertyOnly) { throw new LinqDbException("Linqdb: group index on property " + /*tree.GroupingInfo.ColumnName*/ column_name + " does not exist."); } var ids_index = indexes[table_info.Name + "|Id|" + id_snapshot_id]; if (!fres.All) { var tmp = new IndexGeneric() { ColumnName = group_index.ColumnName, ColumnType = group_index.ColumnType, TypeName = group_index.TypeName, GroupListMapping = new ConcurrentDictionary <int, int>(group_index.GroupListMapping), Parts = new List <IndexPart>() { new IndexPart() { GroupValues = new List <int>(count) } }, IndexType = group_index.IndexType }; tmp.Parts[0].GroupValues = EfficientContainsOneCall(ids_index, group_index, fres.ResIds, true, false).Item1; group_index = tmp; } var prop_indexes = new Dictionary <string, IndexGeneric>(); foreach (var agr in aggregates) { if (!string.IsNullOrEmpty(agr.Item2)) { var prop = agr.Item2; if (!prop_indexes.ContainsKey(prop)) { skey = MakeSnapshotKey(table_info.TableNumber, table_info.ColumnNumbers[prop]); snapid = leveld_db.Get(skey, null, ro); if (snapid == null || !existing_indexes.ContainsKey(table_info.Name) || !existing_indexes[table_info.Name].Contains(prop)) { throw new LinqDbException("Linqdb: group index on property " + prop + " does not exist."); } snapshot_id = Encoding.UTF8.GetString(snapid); var tmpk = table_info.Name + "|" + prop + "|" + snapshot_id; if (!indexes.ContainsKey(tmpk)) { throw new LinqDbException("Linqdb: group index on property " + prop + " does not exist."); } prop_indexes[prop] = indexes[tmpk]; if (!fres.All) { var tmp = new IndexGeneric() { ColumnName = prop_indexes[prop].ColumnName, ColumnType = prop_indexes[prop].ColumnType, TypeName = prop_indexes[prop].TypeName, Parts = new List <IndexPart>() { new IndexPart() { IntValues = new List <int>(fres.ResIds.Count()), DoubleValues = new List <double>(fres.ResIds.Count()) } }, }; switch (tmp.ColumnType) { case LinqDbTypes.int_: tmp.Parts[0].IntValues = EfficientContainsOneCall(ids_index, prop_indexes[prop], fres.ResIds, false, false).Item1; break; case LinqDbTypes.double_: case LinqDbTypes.DateTime_: tmp.Parts[0].DoubleValues = EfficientContainsOneCall(ids_index, prop_indexes[prop], fres.ResIds, false, true).Item2; break; default: break; } prop_indexes[prop] = tmp; } } } } int max = group_index.GroupListMapping.Max(f => f.Value) + 1; var group_check_list = new List <bool>(max); for (int i = 0; i < max; i++) { group_check_list.Add(false); } for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var gr = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { group_check_list[gr[j]] = true; } } for (int i = 0; i < group_check_list.Count(); i++) { if (group_check_list[i]) { distinct_groups.Add(i); } } foreach (var group in distinct_groups) { cdata[group] = new List <object>(); } foreach (var agr in aggregates) { string prop = ""; if (!string.IsNullOrEmpty(agr.Item2)) { prop = agr.Item2; } if (agr.Item1.StartsWith("Key")) { foreach (var gr in cdata) { gr.Value.Add(group_index.GroupListMapping.Where(f => f.Value == gr.Key).First().Key); } } else if (agr.Item1.StartsWith("CountDistinct")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; int bloom_size = 100000; switch (prop_indexes[prop].ColumnType) { case LinqDbTypes.int_: var bloom_int = new List <List <bool> >(max_val); var last_ints = new List <List <int> >(max_val); for (int i = 0; i < max_val; i++) { var last_bloom = new List <bool>(bloom_size); bloom_int.Add(last_bloom); var last_int = new List <int>(bloom_size); last_ints.Add(last_int); for (int j = 0; j < bloom_size; j++) { last_bloom.Add(false); last_int.Add(0); } } var list_int = new List <HashSet <int> >(max_val); for (int i = 0; i < max_val; i++) { list_int.Add(new HashSet <int>()); } var int_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var iv = int_values.Parts[i].IntValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int val = iv[j]; var bl = bloom_int[gv[j]]; var li = last_ints[gv[j]]; var hash = val % bloom_size; if (!bl[hash] || li[hash] != val) { list_int[gv[j]].Add(val); bl[hash] = true; li[hash] = val; } } } foreach (var gr in cdata) { gr.Value.Add(list_int[gr.Key].Count()); } break; case LinqDbTypes.double_: var bloom_double = new List <List <bool> >(max_val); var last_doubles = new List <List <double> >(max_val); for (int i = 0; i < max_val; i++) { var last_bloom = new List <bool>(bloom_size); bloom_double.Add(last_bloom); var last_double = new List <double>(bloom_size); last_doubles.Add(last_double); for (int j = 0; j < bloom_size; j++) { last_bloom.Add(false); last_double.Add(0); } } var list_double = new List <HashSet <double> >(max_val); for (int i = 0; i < max_val; i++) { list_double.Add(new HashSet <double>()); } var double_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var dv = double_values.Parts[i].DoubleValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { double val = dv[j]; var bl = bloom_double[gv[j]]; var li = last_doubles[gv[j]]; var hash = Math.Abs(val.GetHashCode()) % bloom_size; if (!bl[hash] || li[hash] != val) { list_double[gv[j]].Add(val); bl[hash] = true; li[hash] = val; } } } foreach (var gr in cdata) { gr.Value.Add(list_double[gr.Key].Count()); } break; default: break; } } else if (agr.Item1.StartsWith("Count")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; var list = new List <int>(max_val); for (int i = 0; i < max_val; i++) { list.Add(0); } for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { list[gv[j]]++; } } foreach (var gr in cdata) { gr.Value.Add(list[(int)gr.Key]); } } else if (agr.Item1.StartsWith("Sum")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; var list_int = new List <int>(max_val); for (int i = 0; i < max_val; i++) { list_int.Add(0); } var list_double = new List <double>(max_val); for (int i = 0; i < max_val; i++) { list_double.Add(0); } switch (prop_indexes[prop].ColumnType) { case LinqDbTypes.int_: var int_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var iv = int_values.Parts[i].IntValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { list_int[gv[j]] += iv[j]; } } foreach (var gr in cdata) { gr.Value.Add(list_int[gr.Key]); } break; case LinqDbTypes.double_: var double_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var dv = double_values.Parts[i].DoubleValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { list_double[gv[j]] += dv[j]; } } foreach (var gr in cdata) { gr.Value.Add(list_double[gr.Key]); } break; default: break; } } else if (agr.Item1.StartsWith("Max")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; var list_int = new List <int?>(max_val); for (int i = 0; i < max_val; i++) { list_int.Add(null); } var list_double = new List <double?>(max_val); for (int i = 0; i < max_val; i++) { list_double.Add(null); } switch (prop_indexes[prop].ColumnType) { case LinqDbTypes.int_: var int_values = prop_indexes[prop]; int?igval = null; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var iv = int_values.Parts[i].IntValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; int ij = iv[j]; igval = list_int[g]; if (igval == null || igval < ij) { list_int[g] = ij; } } } foreach (var gr in cdata) { gr.Value.Add(list_int[gr.Key]); } break; case LinqDbTypes.double_: var double_values = prop_indexes[prop]; double?gval = null; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var dv = double_values.Parts[i].DoubleValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; double d = dv[j]; gval = list_double[g]; if (gval == null || gval < d) { list_double[g] = d; } } } foreach (var gr in cdata) { gr.Value.Add(list_double[gr.Key]); } break; default: break; } } else if (agr.Item1.StartsWith("Min")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; var list_int = new List <int?>(max_val); for (int i = 0; i < max_val; i++) { list_int.Add(null); } var list_double = new List <double?>(max_val); for (int i = 0; i < max_val; i++) { list_double.Add(null); } switch (prop_indexes[prop].ColumnType) { case LinqDbTypes.int_: var int_values = prop_indexes[prop]; int?igval = null; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var iv = int_values.Parts[i].IntValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; igval = list_int[g]; if (igval == null || igval > iv[j]) { list_int[g] = iv[j]; } } } foreach (var gr in cdata) { gr.Value.Add(list_int[gr.Key]); } break; case LinqDbTypes.double_: var double_values = prop_indexes[prop]; double?gval = null; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var dv = double_values.Parts[i].DoubleValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; gval = list_double[g]; if (gval == null || gval > dv[j]) { list_double[g] = dv[j]; } } } foreach (var gr in cdata) { gr.Value.Add(list_double[gr.Key]); } break; default: break; } } else if (agr.Item1.StartsWith("Average")) { int max_val = group_index.GroupListMapping.Max(f => f.Value) + 1; var total_count = new List <int>(max_val); for (int i = 0; i < max_val; i++) { total_count.Add(0); } var list_int = new List <int>(max_val); for (int i = 0; i < max_val; i++) { list_int.Add(0); } var list_double = new List <double>(max_val); for (int i = 0; i < max_val; i++) { list_double.Add(0); } switch (prop_indexes[prop].ColumnType) { case LinqDbTypes.int_: var int_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var iv = int_values.Parts[i].IntValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; list_int[g] += iv[j]; total_count[g]++; } } foreach (var gr in cdata) { if (total_count[gr.Key] != 0) { gr.Value.Add(list_int[gr.Key] / (double)total_count[gr.Key]); } else { gr.Value.Add(list_int[gr.Key]); } } break; case LinqDbTypes.double_: var double_values = prop_indexes[prop]; for (int i = 0; i < group_index.Parts.Count(); i++) { int icount = group_index.Parts[i].GroupValues.Count(); var dv = double_values.Parts[i].DoubleValues; var gv = group_index.Parts[i].GroupValues; for (int j = 0; j < icount; j++) { int g = gv[j]; list_double[g] += dv[j]; total_count[g]++; } } foreach (var gr in cdata) { if (total_count[gr.Key] != 0) { gr.Value.Add(list_double[gr.Key] / (double)total_count[gr.Key]); } else { gr.Value.Add(list_double[gr.Key]); } } break; default: break; } } else { throw new LinqDbException("Linqdb: group aggregation function " + agr.Item1 + " is not supported."); } } return(cdata); }
public void MakeNewPropSnapshot(string type, string prop, string snapshot_id, IndexNewData index_new, IndexDeletedData index_deleted, IndexChangedData index_changed) { var latest_key = type + "|" + prop; var latest_snapshot_id = latest_snapshots[latest_key]; var key = type + "|" + prop + "|" + latest_snapshot_id; var index = indexes[key]; var ids_index = indexes[type + "|Id|" + latest_snapshots[type + "|Id"]]; var parts = new List <IndexPart>(); var new_index = new IndexGeneric() { ColumnName = index.ColumnName, ColumnType = index.ColumnType, GroupListMapping = index.GroupListMapping, IndexType = index.IndexType, Parts = parts, TypeName = index.TypeName }; bool has_changed_int = index_changed != null && index_changed.IntValues != null && index_changed.IntValues.Any(); bool has_changed_double = index_changed != null && index_changed.DoubleValues != null && index_changed.DoubleValues.Any(); bool has_deleted = index_deleted != null && index_deleted.Ids.Any(); //var dbloomsize = 100000; //var deleted_bloom = new List<bool>(dbloomsize); //for (int i = 0; i < dbloomsize; i++) //{ // deleted_bloom.Add(false); //} //if (has_deleted) //{ // foreach (var did in index_deleted.Ids) // { // deleted_bloom[did % dbloomsize] = true; // } //} //var changed_int_bloom = new List<bool>(dbloomsize); //for (int i = 0; i < dbloomsize; i++) //{ // changed_int_bloom.Add(false); //} //if (has_changed_int) //{ // foreach (var cid in index_changed.IntValues) // { // changed_int_bloom[cid.Key % dbloomsize] = true; // } //} //var changed_double_bloom = new List<bool>(dbloomsize); //for (int i = 0; i < dbloomsize; i++) //{ // changed_double_bloom.Add(false); //} //if (has_changed_double) //{ // foreach (var cid in index_changed.DoubleValues) // { // changed_double_bloom[cid.Key % dbloomsize] = true; // } //} switch (index.ColumnType) { case LinqDbTypes.int_: if (index.IndexType == IndexType.PropertyOnly) //property index only { for (int i = 0; i < index.Parts.Count; i++) { var p = index.Parts[i]; p.Ids = ids_index.Parts[i].IntValues; IndexPart np = null; HashSet <int> removed = null; int icount = p.IntValues.Count; for (int j = 0; j < icount; j++) { var cid = p.Ids[j]; if (has_changed_int /*&& changed_int_bloom[cid % dbloomsize]*/ && index_changed.IntValues.ContainsKey(cid)) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), IntValues = new List <int>(p.IntValues) }; } np.IntValues[j] = index_changed.IntValues[cid]; } //if (j >= p.Ids.Count()) //{ // var a = 5; //} if (has_deleted /*&& deleted_bloom[cid % dbloomsize]*/ && index_deleted.Ids.Contains(cid)) { if (removed == null) { removed = new HashSet <int>(); } removed.Add((int)cid); } } if (removed != null) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), IntValues = new List <int>(p.IntValues) }; } var npp = new IndexPart() { Ids = new List <int>(), IntValues = new List <int>() }; for (int z = 0; z < np.Ids.Count; z++) { if (!removed.Contains((int)np.Ids[z])) { npp.Ids.Add(np.Ids[z]); npp.IntValues.Add(np.IntValues[z]); } } np = npp; } if (np != null) { p = np; //wow, finally! } parts.Add(p); } if (index_new != null) { IndexPart lastp = null; if (parts.Any()) { lastp = parts.LastOrDefault(); } if (lastp == null) { lastp = new IndexPart() { Ids = new List <int>(), IntValues = new List <int>() }; parts.Add(lastp); } else { lastp = new IndexPart() { Ids = new List <int>(lastp.Ids), IntValues = new List <int>(lastp.IntValues) }; parts[parts.Count() - 1] = lastp; } for (int i = 0; i < index_new.IntValues.Count; i++) { if (lastp.Ids.Count == 1000) { lastp = new IndexPart() { Ids = new List <int>(), IntValues = new List <int>() }; parts.Add(lastp); } lastp.IntValues.Add(index_new.IntValues[i]); lastp.Ids.Add(index_new.Ids[i]); } } for (int i = 0; i < index.Parts.Count(); i++) { index.Parts[i].Ids = null; } } else if (index.IndexType == IndexType.GroupOnly) //group index only { int map = 0; if (index.GroupListMapping.Any()) { map = index.GroupListMapping.Max(f => f.Value); map++; } for (int i = 0; i < index.Parts.Count; i++) { var p = index.Parts[i]; p.Ids = ids_index.Parts[i].IntValues; IndexPart np = null; HashSet <int> removed = null; int icount = p.GroupValues.Count; for (int j = 0; j < icount; j++) { int cid = p.Ids[j]; if (has_changed_int /*&& changed_int_bloom[cid % dbloomsize]*/ && index_changed.IntValues.ContainsKey(cid)) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), GroupValues = new List <int>(p.GroupValues) }; } if (!index.GroupListMapping.ContainsKey(index_changed.IntValues[cid])) { index.GroupListMapping[index_changed.IntValues[cid]] = map; map++; } np.GroupValues[j] = index.GroupListMapping[index_changed.IntValues[cid]]; } if (has_deleted /*&& deleted_bloom[cid % dbloomsize]*/ && index_deleted.Ids.Contains(cid)) { if (removed == null) { removed = new HashSet <int>(); } removed.Add(cid); } } if (removed != null) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), GroupValues = new List <int>(p.GroupValues) }; } var npp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>() }; for (int z = 0; z < np.Ids.Count; z++) { if (!removed.Contains((int)np.Ids[z])) { npp.Ids.Add(np.Ids[z]); npp.GroupValues.Add(np.GroupValues[z]); } } np = npp; } if (np != null) { p = np; //wow, finally! } parts.Add(p); } if (index_new != null) { IndexPart lastp = null; if (parts.Any()) { lastp = parts.LastOrDefault(); } if (lastp == null) { lastp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>() }; parts.Add(lastp); } else { lastp = new IndexPart() { Ids = new List <int>(lastp.Ids), GroupValues = new List <int>(lastp.GroupValues) }; parts[parts.Count() - 1] = lastp; } for (int i = 0; i < index_new.IntValues.Count; i++) { if (lastp.Ids.Count == 1000) { lastp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>() }; parts.Add(lastp); } if (!index.GroupListMapping.ContainsKey((int)index_new.IntValues[i])) { index.GroupListMapping[(int)index_new.IntValues[i]] = map; map++; } lastp.GroupValues.Add(index.GroupListMapping[(int)index_new.IntValues[i]]); lastp.Ids.Add(index_new.Ids[i]); } } for (int i = 0; i < index.Parts.Count(); i++) { index.Parts[i].Ids = null; } } else //both { int map = 0; if (index.GroupListMapping.Any()) { map = index.GroupListMapping.Max(f => f.Value); map++; } for (int i = 0; i < index.Parts.Count; i++) { var p = index.Parts[i]; p.Ids = ids_index.Parts[i].IntValues; IndexPart np = null; HashSet <int> removed = null; int icount = p.GroupValues.Count; for (int j = 0; j < icount; j++) { int cid = p.Ids[j]; if (has_changed_int /*&& changed_int_bloom[cid % dbloomsize]*/ && index_changed.IntValues.ContainsKey(cid)) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), GroupValues = new List <int>(p.GroupValues), IntValues = new List <int>(p.IntValues) }; } if (!index.GroupListMapping.ContainsKey(index_changed.IntValues[cid])) { index.GroupListMapping[index_changed.IntValues[cid]] = map; map++; } np.GroupValues[j] = index.GroupListMapping[index_changed.IntValues[cid]]; np.IntValues[j] = index_changed.IntValues[cid]; } if (has_deleted /*&& deleted_bloom[cid % dbloomsize]*/ && index_deleted.Ids.Contains(cid)) { if (removed == null) { removed = new HashSet <int>(); } removed.Add(cid); } } if (removed != null) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), GroupValues = new List <int>(p.GroupValues), IntValues = new List <int>(p.IntValues) }; } var npp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>(), IntValues = new List <int>() }; for (int z = 0; z < np.Ids.Count; z++) { if (!removed.Contains((int)np.Ids[z])) { npp.Ids.Add(np.Ids[z]); npp.GroupValues.Add(np.GroupValues[z]); npp.IntValues.Add(np.IntValues[z]); } } np = npp; } if (np != null) { p = np; //wow, finally! } parts.Add(p); } if (index_new != null) { IndexPart lastp = null; if (parts.Any()) { lastp = parts.LastOrDefault(); } if (lastp == null) { lastp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>(), IntValues = new List <int>() }; parts.Add(lastp); } else { lastp = new IndexPart() { Ids = new List <int>(lastp.Ids), GroupValues = new List <int>(lastp.GroupValues), IntValues = new List <int>(lastp.IntValues) }; parts[parts.Count() - 1] = lastp; } for (int i = 0; i < index_new.IntValues.Count; i++) { if (lastp.Ids.Count == 1000) { lastp = new IndexPart() { Ids = new List <int>(), GroupValues = new List <int>(), IntValues = new List <int>() }; parts.Add(lastp); } if (!index.GroupListMapping.ContainsKey((int)index_new.IntValues[i])) { index.GroupListMapping[(int)index_new.IntValues[i]] = map; map++; } lastp.GroupValues.Add(index.GroupListMapping[(int)index_new.IntValues[i]]); lastp.IntValues.Add(index_new.IntValues[i]); lastp.Ids.Add(index_new.Ids[i]); } } for (int i = 0; i < index.Parts.Count(); i++) { index.Parts[i].Ids = null; } } break; case LinqDbTypes.double_: case LinqDbTypes.DateTime_: for (int i = 0; i < index.Parts.Count; i++) { var p = index.Parts[i]; p.Ids = ids_index.Parts[i].IntValues; IndexPart np = null; HashSet <int> removed = null; int icount = p.DoubleValues.Count; for (int j = 0; j < icount; j++) { int cid = p.Ids[j]; if (has_changed_double /*&& changed_double_bloom[cid % dbloomsize]*/ && index_changed.DoubleValues.ContainsKey(cid)) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), DoubleValues = new List <double>(p.DoubleValues) }; } np.DoubleValues[j] = index_changed.DoubleValues[cid]; } if (has_deleted /*&& deleted_bloom[cid % dbloomsize]*/ && index_deleted.Ids.Contains(cid)) { if (removed == null) { removed = new HashSet <int>(); } removed.Add(cid); } } if (removed != null) { if (np == null) { np = new IndexPart() { Ids = new List <int>(p.Ids), DoubleValues = new List <double>(p.DoubleValues) }; } var npp = new IndexPart() { Ids = new List <int>(), DoubleValues = new List <double>() }; for (int z = 0; z < np.Ids.Count; z++) { if (!removed.Contains((int)np.Ids[z])) { npp.Ids.Add(np.Ids[z]); npp.DoubleValues.Add(np.DoubleValues[z]); } } np = npp; } if (np != null) { p = np; //wow, finally! } parts.Add(p); } if (index_new != null) { IndexPart lastp = null; if (parts.Any()) { lastp = parts.LastOrDefault(); } if (lastp == null) { lastp = new IndexPart() { Ids = new List <int>(), DoubleValues = new List <double>() }; parts.Add(lastp); } else { lastp = new IndexPart() { Ids = new List <int>(lastp.Ids), DoubleValues = new List <double>(lastp.DoubleValues) }; parts[parts.Count() - 1] = lastp; } for (int i = 0; i < index_new.DoubleValues.Count; i++) { if (lastp.Ids.Count == 1000) { lastp = new IndexPart() { Ids = new List <int>(), DoubleValues = new List <double>() }; parts.Add(lastp); } lastp.DoubleValues.Add(index_new.DoubleValues[i]); lastp.Ids.Add(index_new.Ids[i]); } } for (int i = 0; i < index.Parts.Count(); i++) { index.Parts[i].Ids = null; } break; default: break; } //free old snapshots if ((DateTime.Now - last_cleanup[type + "|" + prop]).TotalMilliseconds > 30000) { var alive = snapshots_alive[type + "|" + prop]; var new_alive = new List <Tuple <bool, string> >(); //bool - schedueled for deletion foreach (var sn in alive) { if (!sn.Item1) { var nsn = new Tuple <bool, string>(true, sn.Item2); new_alive.Add(nsn); } else { IndexGeneric val = null; indexes.TryRemove(sn.Item2, out val); } } new_alive.Add(new Tuple <bool, string>(false, type + "|" + prop + "|" + snapshot_id)); snapshots_alive[type + "|" + prop] = new_alive; last_cleanup[type + "|" + prop] = DateTime.Now; } else { snapshots_alive[type + "|" + prop].Add(new Tuple <bool, string>(false, type + "|" + prop + "|" + snapshot_id)); } key = type + "|" + prop + "|" + snapshot_id; indexes[key] = new_index; latest_snapshots[latest_key] = snapshot_id; }