public void InitBuffer() { // размер буфера int bufferportion = 1000; // размер порции для внедрения данных int portion = 40; b_entities = new BufferredProcessing <string>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); VirtuosoCommand trcommand = engine.RunStart(); foreach (var q in query) { string data = q.Select(ei => ei.e + " . ") .Aggregate((sum, s) => sum + " " + s); //bool found = q.Any(ei => ei.e.s == "Gury_Marchuk"); trcommand.CommandText = "SPARQL INSERT INTO GRAPH <" + graph + "> {" + data + "}\n"; try { trcommand.ExecuteNonQuery(); } catch (Exception ex) { } } engine.RunStop(trcommand); }); }
public EngineRDB(string providerId, DbConnection conn) { this.providerId = providerId; connection = conn; // connection теперь статический b_entities = null; b_literals = null; b_dstatements = null; b_ostatements = null; //table_ri = null; ENT_INDEX = -1; LIT_INDEX = -1; iENTS = new Dictionary<string, int>(); iLITS = new Dictionary<string, int>(); sENTS = new Dictionary<int, string>(); sLITS = new Dictionary<int, string>(); typesOfEntities = new Dictionary<int, int>(); }
// Открывающая скобка транзакции ввода. Выдает DbCommand c транзакцией DbTransaction, если она поддерживается данной СУБД protected override void InitAdapterBuffers() { //if (append) if (ENT_INDEX < 0 || LIT_INDEX < 0) { // можно еще проверять по ENT_INDEX < 0 || LIT_INDEX < 0 //using (connection) { try { } catch (Exception) {} } DbCommand sqlcommand = connection.CreateCommand(); sqlcommand.CommandText = "SELECT MAX(entityid) FROM rdf_entities;"; connection.Open(); var oind = sqlcommand.ExecuteScalar(); if (oind != null) ENT_INDEX = (int)oind + 1; else { throw new Exception("Не найден максимальный индекс сущностей"); } sqlcommand.CommandText = "SELECT MAX(literalid) FROM rdf_literals;"; var oind2 = sqlcommand.ExecuteScalar(); connection.Close(); if (oind2 != null) LIT_INDEX = (int)oind2 + 1; else { throw new Exception("Не найден максимальный индекс литералов"); } } // размер буферов int bufferportion = 60000; // размер порции для внедрения данных int portion = 20; b_entities = new sema2012m.BufferredProcessing<RDFEntity>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); try { foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.entityid + ",'" + ei.e.entityvalue + "')") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); } catch (Exception) { RunCancel(runcommand); throw new Exception("Error 2938"); } }); b_literals = new sema2012m.BufferredProcessing<RDFLiteral>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); try { foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.literalid + ",'" + ei.e.literalvalue.Replace('\'', '"') + "'," + (ei.e.literallang == null ? "NULL" : "'" + ei.e.literallang + "'") + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_literals VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); } catch (Exception) { RunCancel(runcommand); throw new Exception("Error 2939"); } }); b_dstatements = new sema2012m.BufferredProcessing<RDFDStatement>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.dsubject + "," + ei.e.dpredicate + "," + ei.e.data + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_dstatements VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); }); b_ostatements = new sema2012m.BufferredProcessing<RDFOStatement>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); DbCommand runcommand = RunStart(); foreach (var q in query) { var qq = q.Select(ei => "(" + ei.e.osubject + "," + ei.e.opredicate + "," + ei.e.oobj + ")") .Aggregate((sum, s) => sum + "," + s); runcommand.CommandText = "INSERT INTO rdf_ostatements VALUES " + qq + ";"; runcommand.ExecuteNonQuery(); } RunStop(runcommand); }); if (use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(10, flow => { foreach (var tri in flow) { int i_s = GetSetEntityIndex(tri.s); int i_p = GetSetEntityIndex(tri.p); if (tri is OProp) { int i_o = GetSetEntityIndex(((OProp)tri).o); b_ostatements.Add(new RDFOStatement() { osubject = i_s, opredicate = i_p, oobj = i_o }); } else { int i_lit = LIT_INDEX; LIT_INDEX++; DProp dp = (DProp)tri; var rdflit = new RDFLiteral() { literalid = i_lit, literalvalue = dp.d }; if (dp.lang != null) rdflit.literallang = dp.lang; b_literals.Add(rdflit); b_dstatements.Add(new RDFDStatement() { dsubject = i_s, dpredicate = i_p, data = i_lit }); } } }); int tbuffervolume = 20000; int tbufferportion = 200; if (!use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(tbuffervolume, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / tbufferportion, ei => ei); // отложенная буферизация List<RDFLiteral> l_list = new List<RDFLiteral>(); List<RDFOStatement> o_list = new List<RDFOStatement>(); List<RDFDStatement> d_list = new List<RDFDStatement>(); DbCommand runcommand = RunStart(); foreach (var q in query) { var triplets = q.Select(ei => ei.e).ToArray(); var entities = triplets.Select(tr => tr.s) .Concat(triplets.Select(tr => tr.p)) .Concat(triplets.Where(tr => tr is OProp).Cast<OProp>().Select(op => op.o)) .Distinct().OrderBy(x => x).ToArray(); List<KeyValuePair<string, int>> entityindexes; entityindexes = EntityIndexes(entities, runcommand).OrderBy(x => x.Key).ToList(); Dictionary<string, int> dic = entityindexes.ToDictionary(pair => pair.Key, pair => pair.Value); var unindexedentities = entities.Where(en => entityindexes.All(ei => ei.Key != en)); StringBuilder sb = new StringBuilder(); bool notfirst = false; foreach (var uie in unindexedentities) { if (notfirst) sb.Append(','); notfirst = true; sb.Append("(" + ENT_INDEX + ",'" + uie + "')"); //entityindexes.Add(new KeyValuePair<string, int>(uie, ENT_INDEX)); dic.Add(uie, ENT_INDEX); ENT_INDEX++; } if (sb.Length > 0) { runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + sb.ToString() + ";"; runcommand.ExecuteNonQuery(); } foreach (var tr in triplets) { if (tr is OProp) { o_list.Add(new RDFOStatement() { osubject = dic[tr.s], opredicate = dic[tr.p], oobj = dic[((OProp)tr).o] }); } else { DProp dp = (DProp)tr; l_list.Add(new RDFLiteral() { literalid = LIT_INDEX, literalvalue = dp.d, literallang = dp.lang }); d_list.Add(new RDFDStatement() { dsubject = dic[tr.s], dpredicate = dic[tr.p], data = LIT_INDEX }); LIT_INDEX++; } } } RunStop(runcommand); foreach(var v in l_list) b_literals.Add(v); foreach (var v in o_list) b_ostatements.Add(v); foreach (var v in d_list) b_dstatements.Add(v); }); }
public static IEnumerable<RDFLiteral> GetLiteralsForIds(IEnumerable<int> ids) { List<RDFLiteral> literals = new List<RDFLiteral>(); BufferredProcessing<int> buff = new BufferredProcessing<int>(200, id_set => { string sql = "SELECT literalid,literalvalue,literallang FROM rdf_literals WHERE " + id_set.Select(id => "literalid=" + id).Aggregate((sum, s) => sum + " OR " + s); literals.AddRange(RelationalDb.RunQuery(sql).Select(r => new RDFLiteral() { literalid = (int)r[0], literalvalue = (string)r[1], literallang = (r[2] is DBNull) ? null : (string)r[2] })); }); foreach (var id in ids) buff.Add(id); buff.Flush(); return literals; }
public static IEnumerable<RDFDStatement> GetDStatementsForSubjects(IEnumerable<int> subjects) { List<RDFDStatement> dstatements = new List<RDFDStatement>(); BufferredProcessing<int> buff = new BufferredProcessing<int>(200, subj_set => { string sql = "SELECT dsubject,dpredicate,data FROM rdf_dstatements WHERE " + subj_set.Select(su => "dsubject=" + su).Aggregate((sum, s) => sum + " OR " + s); dstatements.AddRange(RelationalDb.RunQuery(sql).Select(r => new RDFDStatement() { dsubject = (int)r[0], dpredicate = (int)r[1], data = (int)r[2], })); }); foreach (var subj in subjects) buff.Add(subj); buff.Flush(); return dstatements; }
public static IEnumerable<RDFOStatement> GetOStatementsForObjs(IEnumerable<int> objs) { List<RDFOStatement> ostatements = new List<RDFOStatement>(); BufferredProcessing<int> buff = new BufferredProcessing<int>(200, obj_set => { string sql = "SELECT osubject,opredicate,oobj FROM rdf_ostatements WHERE " + obj_set.Select(su => "oobj=" + su).Aggregate((sum, s) => sum + " OR " + s); ostatements.AddRange(RelationalDb.RunQuery(sql).Select(r => new RDFOStatement() { osubject = (int)r[0], opredicate = (int)r[1], oobj = (int)r[2], })); }); foreach (var obj in objs) buff.Add(obj); buff.Flush(); return ostatements; }
public static void FillDatabaseFromTripletFlowVirtuoso(EntitiesAndTripletFlow lfd) { int bufferportion = 200; BufferredProcessing<Triplet> b_entities = new BufferredProcessing<Triplet>(bufferportion, flow => { string command = "SPARQL INSERT INTO GRAPH <" + graph + "> {" + flow.Select(ent => "<" + ent.s + "> <" + ent.p + "> " + ((ent is OProp) ? "<" + ((OProp)ent).o + ">" : "\"" + ((DProp)ent).d.Replace('\"', '\'').Replace('\\', '/').Replace('\n', ' ') + (((DProp)ent).lang != null?"@"+((DProp)ent).lang:"") + "\"") + ".") .Aggregate((sum, s) => sum + " " + s) + "}\n"; RunCommand(command); }); // Просканируем поток триплетов и вставим информацию в базу данных foreach (var tr in lfd.ScanDatabases()) { b_entities.Add(tr); } b_entities.Flush(); }
public static void FillDatabaseFromTripletFlow(EntitiesAndTripletFlow lfd) { int bufferportion = 200; BufferredProcessing<RDFEntity> b_entities = new BufferredProcessing<RDFEntity>(bufferportion, flow => { string command = "INSERT INTO rdf_entities VALUES " + flow.Select(ent => "(" + ent.entityid + "," + ent.entitytype + ",'" + ent.entityvalue + "')") .Aggregate((sum, s) => sum + "," + s); RunCommand(command); }); BufferredProcessing<RDFLiteral> b_literals = new BufferredProcessing<RDFLiteral>(bufferportion, flow => { string command = "INSERT INTO rdf_literals VALUES " + flow.Select(lit => "(" + lit.literalid + ",N'" + lit.literalvalue.Replace('\'', '"') + "'," + (lit.literallang == null ? "NULL" : "'" + lit.literallang + "'") + ")") .Aggregate((sum, s) => sum + "," + s); RunCommand(command); }); BufferredProcessing<RDFDStatement> b_dstatements = new BufferredProcessing<RDFDStatement>(bufferportion, flow => { string command = "INSERT INTO rdf_dstatements VALUES " + flow.Select(dst => "(" + dst.dsubject + "," + dst.dpredicate + "," + dst.data + ")") .Aggregate((sum, s) => sum + "," + s); RunCommand(command); }); BufferredProcessing<RDFOStatement> b_ostatements = new BufferredProcessing<RDFOStatement>(bufferportion, flow => { string command = "INSERT INTO rdf_ostatements VALUES " + flow.Select(ost => "(" + ost.osubject + "," + ost.opredicate + "," + ost.oobj + ")") .Aggregate((sum, s) => sum + "," + s); RunCommand(command); }); // Теперь просканируем поток триплетов и вставим информацию в базу данных foreach (var tr in lfd.ScanDatabases()) { if (tr is OProp) { OProp op = (OProp)tr; int subj_i; if (lfd.TryGetEntity(op.s, out subj_i)) { int type_i = -1; string type_s = lfd.GetTypeOfEntity(op.s); if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s }); b_entities.Add(new RDFEntity() { entityid = subj_i, entitytype = type_i, entityvalue = op.s }); } int pred_i; if (lfd.TryGetEntity(op.p, out pred_i)) b_entities.Add(new RDFEntity() { entityid = pred_i, entitytype = -1, entityvalue = op.p }); int obj_i; if (lfd.TryGetEntity(op.o, out obj_i)) { int type_i = -1; string type_s = lfd.GetTypeOfEntity(op.o); if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s }); b_entities.Add(new RDFEntity() { entityid = obj_i, entitytype = type_i, entityvalue = op.o }); } b_ostatements.Add(new RDFOStatement() { osubject = subj_i, opredicate = pred_i, oobj = obj_i }); } else { DProp dp = (DProp)tr; int lit_i; if (lfd.TryGetLiteral(dp.d, out lit_i, dp.lang)) b_literals.Add(new RDFLiteral() { literalid = lit_i, literalvalue = dp.d, literallang = dp.lang }); int subj_i; if (lfd.TryGetEntity(dp.s, out subj_i)) { int type_i = -1; string type_s = lfd.GetTypeOfEntity(dp.s); if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s }); b_entities.Add(new RDFEntity() { entityid = subj_i, entitytype = type_i, entityvalue = dp.s }); } int pred_i; if (lfd.TryGetEntity(dp.p, out pred_i)) b_entities.Add(new RDFEntity() { entityid = pred_i, entitytype = -2, entityvalue = dp.p }); b_dstatements.Add(new RDFDStatement() { dsubject = subj_i, dpredicate = pred_i, data = lit_i }); } } b_entities.Flush(); b_literals.Flush(); b_dstatements.Flush(); b_ostatements.Flush(); }
// Четыре действия транзакции ввода. Транзакция наинается с первого действия, потом произвольное количество вторых // и/или IncludeXElement и заканчивается третьим. Первое действие генерирует DbCommand для случая использования DbConnection // или что-то другое для другой СУБД. Комманда должна "протягиваться" далее по действиям для поддержания транзакции. FlushAdapterBuffers сбрасывает буфера и закрывает транзакцию // Для СУБД, где невыгодно размельчать БД до триплетов, IncludeXElement переопределяется (без использования FillDatabaseFromTripletFlow) protected override void InitAdapterBuffers() { // размер буфера int bufferportion = 10000; // размер порции для внедрения данных int portion = 20; b_entities = new BufferredProcessing<Triplet>(bufferportion, flow => { var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei); VirtuosoCommand trcommand = RunStart(); foreach (var q in query) { string data = q.Select(ei => "<" + CorrectEntity(ei.e.s) + "> <" + CorrectEntity(ei.e.p) + "> " + (ei.e is sema2012m.OProp ? "<" + CorrectEntity(((sema2012m.OProp)ei.e).o) + "> ." : "\"" + ((DProp)ei.e).d.Replace('\"', '\'').Replace('\\', '/').Replace('\n', ' ') + (((DProp)ei.e).lang != null ? "@" + ((DProp)ei.e).lang : "") + "\" .")) .Aggregate((sum, s) => sum + " " + s); //bool found = q.Any(ei => ei.e.s == "Gury_Marchuk"); trcommand.CommandText = "SPARQL INSERT INTO GRAPH <" + graph + "> {" + data + "}\n"; try { trcommand.ExecuteNonQuery(); //if (data.Contains("Gury_Marchuk")) Protocol("GM: " + data); Nreceived += q.Count(); } catch (Exception ex) { //Protocol("Err: " + ex.Message); if (dynamicLog != null) dynamicLog.AppendLine("Err during data loading: " + ex.Message + "\nSPARQL data=" + data); } } RunStop(trcommand); }); }