public EntryRDB() { // Обнуление и инициация статических переменных b_entities = null; b_literals = null; b_dstatements = null; b_ostatements = null; table_ri = null; ENT_INDEX = -1; LIT_INDEX = -1; iENTS = new Dictionary<string, int>(); iLITS = new Dictionary<string, int>(); sENTS = new Dictionary<int, string>(); sLITS = new Dictionary<int, string>(); typesOfEntities = new Dictionary<int, int>(); }
private IEnumerable<RDFDStatement> GetDStatementsForSubjects(IEnumerable<int> subjects, DbCommand runcommand) { List<RDFDStatement> dstatements = new List<RDFDStatement>(); sema2012m.BufferredProcessing<int> buff = new sema2012m.BufferredProcessing<int>(200, subj_set => { string sql = "SELECT dsubject,dpredicate,data FROM rdf_dstatements WHERE " + subj_set.Select(su => "dsubject=" + su).Aggregate((sum, s) => sum + " OR " + s); dstatements.AddRange(RunQuery(sql, runcommand).Select(r => new RDFDStatement() { dsubject = (int)r[0], dpredicate = (int)r[1], data = (int)r[2], })); }); foreach (var subj in subjects) buff.Add(subj); buff.Flush(); return dstatements; }
// Открывающая скобка транзакции ввода. Выдает DbCommand c транзакцией DbTransaction, если она поддерживается данной СУБД protected override void InitAdapterBuffers() { //if (append) if (ENT_INDEX < 0 || LIT_INDEX < 0) { // можно еще проверять по ENT_INDEX < 0 || LIT_INDEX < 0 //using (connection) { try { } catch (Exception) {} } DbCommand sqlcommand = connection.CreateCommand(); sqlcommand.CommandText = "SELECT MAX(entityid) FROM rdf_entities;"; connection.Open(); var oind = sqlcommand.ExecuteScalar(); if (oind != null) ENT_INDEX = (int)oind + 1; else { throw new Exception("Не найден максимальный индекс сущностей"); } sqlcommand.CommandText = "SELECT MAX(literalid) FROM rdf_literals;"; var oind2 = sqlcommand.ExecuteScalar(); connection.Close(); if (oind2 != null) LIT_INDEX = (int)oind2 + 1; else { throw new Exception("Не найден максимальный индекс литералов"); } } // размер буферов int bufferportion = 60000; // размер порции для внедрения данных int portion = 20; b_entities = new sema2012m.BufferredProcessing<RDFEntity>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); try { foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.entityid + ",'" + ei.e.entityvalue + "')") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); } catch (Exception) { RunCancel(runcommand); throw new Exception("Error 2938"); } }); b_literals = new sema2012m.BufferredProcessing<RDFLiteral>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); try { foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.literalid + ",'" + ei.e.literalvalue.Replace('\'', '"') + "'," + (ei.e.literallang == null ? "NULL" : "'" + ei.e.literallang + "'") + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_literals VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); } catch (Exception) { RunCancel(runcommand); throw new Exception("Error 2939"); } }); b_dstatements = new sema2012m.BufferredProcessing<RDFDStatement>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.dsubject + "," + ei.e.dpredicate + "," + ei.e.data + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_dstatements VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); }); b_ostatements = new sema2012m.BufferredProcessing<RDFOStatement>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.osubject + "," + ei.e.opredicate + "," + ei.e.oobj + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_ostatements VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); }); if (use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(10, flow => { foreach (var tri in flow) { int i_s = GetSetEntityIndex(tri.s); int i_p = GetSetEntityIndex(tri.p); if (tri is OProp) { int i_o = GetSetEntityIndex(((OProp)tri).o); b_ostatements.Add(new RDFOStatement() { osubject = i_s, opredicate = i_p, oobj = i_o }); } else { int i_lit = LIT_INDEX; LIT_INDEX++; DProp dp = (DProp)tri; var rdflit = new RDFLiteral() { literalid = i_lit, literalvalue = dp.d }; if (dp.lang != null) rdflit.literallang = dp.lang; b_literals.Add(rdflit); b_dstatements.Add(new RDFDStatement() { dsubject = i_s, dpredicate = i_p, data = i_lit }); } } }); int tbuffervolume = 20000; int tbufferportion = 200; if (!use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(tbuffervolume, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / tbufferportion, ei => ei); // отложенная буферизация List<RDFLiteral> l_list = new List<RDFLiteral>(); List<RDFOStatement> o_list = new List<RDFOStatement>(); List<RDFDStatement> d_list = new List<RDFDStatement>(); DbCommand runcommand = RunStart(); foreach (var q in query) { var triplets = q.Select(ei => ei.e).ToArray(); var entities = triplets.Select(tr => tr.s) .Concat(triplets.Select(tr => tr.p)) .Concat(triplets.Where(tr => tr is OProp).Cast<OProp>().Select(op => op.o)) .Distinct().OrderBy(x => x).ToArray(); List<KeyValuePair<string, int>> entityindexes; entityindexes = EntityIndexes(entities, runcommand).OrderBy(x => x.Key).ToList(); Dictionary<string, int> dic = entityindexes.ToDictionary(pair => pair.Key, pair => pair.Value); var unindexedentities = entities.Where(en => entityindexes.All(ei => ei.Key != en)); StringBuilder sb = new StringBuilder(); bool notfirst = false; foreach (var uie in unindexedentities) { if (notfirst) sb.Append(','); notfirst = true; sb.Append("(" + ENT_INDEX + ",'" + uie + "')"); //entityindexes.Add(new KeyValuePair<string, int>(uie, ENT_INDEX)); dic.Add(uie, ENT_INDEX); ENT_INDEX++; } if (sb.Length > 0) { runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + sb.ToString() + ";"; runcommand.ExecuteNonQuery(); } foreach (var tr in triplets) { if (tr is OProp) { o_list.Add(new RDFOStatement() { osubject = dic[tr.s], opredicate = dic[tr.p], oobj = dic[((OProp)tr).o] }); } else { DProp dp = (DProp)tr; l_list.Add(new RDFLiteral() { literalid = LIT_INDEX, literalvalue = dp.d, literallang = dp.lang }); d_list.Add(new RDFDStatement() { dsubject = dic[tr.s], dpredicate = dic[tr.p], data = LIT_INDEX }); LIT_INDEX++; } } } RunStop(runcommand); foreach(var v in l_list) b_literals.Add(v); foreach (var v in o_list) b_ostatements.Add(v); foreach (var v in d_list) b_dstatements.Add(v); }); }
public IEnumerable<RDFOStatement> GetOStatementsForObjs(IEnumerable<int> objs, DbCommand runcommand) { List<RDFOStatement> ostatements = new List<RDFOStatement>(); sema2012m.BufferredProcessing<int> buff = new sema2012m.BufferredProcessing<int>(200, obj_set => { string sql = "SELECT osubject,opredicate,oobj FROM rdf_ostatements WHERE " + obj_set.Select(su => "oobj=" + su).Aggregate((sum, s) => sum + " OR " + s); ostatements.AddRange(RunQuery(sql, runcommand).Select(r => new RDFOStatement() { osubject = (int)r[0], opredicate = (int)r[1], oobj = (int)r[2], })); }); foreach (var obj in objs) buff.Add(obj); buff.Flush(); return ostatements; }
private static IEnumerable<RDFOStatement> GetOStatementsForSubjects(IEnumerable<int> subjects) { List<RDFOStatement> ostatements = new List<RDFOStatement>(); sema2012m.BufferredProcessing<int> buff = new sema2012m.BufferredProcessing<int>(200, subj_set => { string sql = "SELECT osubject,opredicate,oobj FROM rdf_ostatements WHERE " + subj_set.Select(su => "osubject=" + su).Aggregate((sum, s) => sum + " OR " + s); ostatements.AddRange(Queries.RunQuery(sql).Select(r => new RDFOStatement() { osubject = (int)r[0], opredicate = (int)r[1], oobj = (int)r[2], })); }); foreach (var subj in subjects) buff.Add(subj); buff.Flush(); return ostatements; }
public void InitAdapterBuffers() { b_entities = new sema2012m.BufferredProcessing<RDFEntity>(bufferportion, flow => { string command = "BEGIN;"; var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.entityid + "," + ei.e.entitytype + ",'" + ei.e.entityvalue + "')") .Aggregate((sum,s) => sum + "," + s); command += "INSERT INTO rdf_entities VALUES " + qq + ";"; } Queries.NonExecuteQuery(command + "COMMIT;"); }); b_literals = new sema2012m.BufferredProcessing<RDFLiteral>(bufferportion, flow => { string command = "BEGIN;"; var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.literalid + ",'" + ei.e.literalvalue.Replace('\'', '"') + "'," + (ei.e.literallang == null ? "NULL" : "'" + ei.e.literallang + "'") + ")") .Aggregate((sum, s) => sum + "," + s); command += "INSERT INTO rdf_literals VALUES " + qq + ";"; } Queries.NonExecuteQuery(command + "COMMIT;"); }); b_dstatements = new sema2012m.BufferredProcessing<RDFDStatement>(bufferportion, flow => { string command = "BEGIN;"; var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.dsubject + "," + ei.e.dpredicate + "," + ei.e.data + ")") .Aggregate((sum, s) => sum + "," + s); command += "INSERT INTO rdf_dstatements VALUES " + qq + ";"; } Queries.NonExecuteQuery(command + "COMMIT;"); }); b_ostatements = new sema2012m.BufferredProcessing<RDFOStatement>(bufferportion, flow => { string command = "BEGIN;"; var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.osubject + "," + ei.e.opredicate + "," + ei.e.oobj + ")") .Aggregate((sum, s) => sum + "," + s); command += "INSERT INTO rdf_ostatements VALUES " + qq + ";"; } Queries.NonExecuteQuery(command + "COMMIT;"); }); }