Beispiel #1
0
        public void InitBuffer()
        {
            // размер буфера
            int bufferportion = 1000;
            // размер порции для внедрения данных
            int portion = 40;

            b_entities = new BufferredProcessing <string>(bufferportion, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);

                VirtuosoCommand trcommand = engine.RunStart();
                foreach (var q in query)
                {
                    string data = q.Select(ei => ei.e + " . ")
                                  .Aggregate((sum, s) => sum + " " + s);
                    //bool found = q.Any(ei => ei.e.s == "Gury_Marchuk");
                    trcommand.CommandText = "SPARQL INSERT INTO GRAPH <" + graph + "> {" + data + "}\n";
                    try
                    {
                        trcommand.ExecuteNonQuery();
                    }
                    catch (Exception ex)
                    {
                    }
                }
                engine.RunStop(trcommand);
            });
        }
Beispiel #2
0
 public EngineRDB(string providerId, DbConnection conn)
 {
     this.providerId = providerId;
     connection = conn; // connection теперь статический
     b_entities = null;
     b_literals = null;
     b_dstatements = null;
     b_ostatements = null;
     //table_ri = null;
     ENT_INDEX = -1;
     LIT_INDEX = -1;
     iENTS = new Dictionary<string, int>();
     iLITS = new Dictionary<string, int>();
     sENTS = new Dictionary<int, string>();
     sLITS = new Dictionary<int, string>();
     typesOfEntities = new Dictionary<int, int>();
 }
Beispiel #3
0
        // Открывающая скобка транзакции ввода. Выдает DbCommand c транзакцией DbTransaction, если она поддерживается данной СУБД
        protected override void InitAdapterBuffers()
        {
            //if (append)
            if (ENT_INDEX < 0 || LIT_INDEX < 0)
            { // можно еще проверять по ENT_INDEX < 0 || LIT_INDEX < 0
                //using (connection)
                {
                    try {
                    } catch (Exception) {}
                }
                DbCommand sqlcommand = connection.CreateCommand();
                sqlcommand.CommandText = "SELECT MAX(entityid) FROM rdf_entities;";
                connection.Open();
                var oind = sqlcommand.ExecuteScalar();
                if (oind != null) ENT_INDEX = (int)oind + 1;
                else { throw new Exception("Не найден максимальный индекс сущностей"); }
                sqlcommand.CommandText = "SELECT MAX(literalid) FROM rdf_literals;";
                var oind2 = sqlcommand.ExecuteScalar();
                connection.Close();
                if (oind2 != null) LIT_INDEX = (int)oind2 + 1;
                else { throw new Exception("Не найден максимальный индекс литералов"); }
            }
            // размер буферов
            int bufferportion = 60000;
            // размер порции для внедрения данных
            int portion = 20;
            b_entities = new sema2012m.BufferredProcessing<RDFEntity>(bufferportion, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);
                DbCommand runcommand = RunStart();
                try
                {
                    foreach (var q in query)
                    {
                        var qq = q.Select(ei => "(" + ei.e.entityid + ",'" + ei.e.entityvalue + "')")
                            .Aggregate((sum, s) => sum + "," + s);
                        runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + qq + ";";
                        runcommand.ExecuteNonQuery();
                    }
                    RunStop(runcommand);
                }
                catch (Exception)
                {
                    RunCancel(runcommand);
                    throw new Exception("Error 2938");
                }
            });
            b_literals = new sema2012m.BufferredProcessing<RDFLiteral>(bufferportion, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);
                DbCommand runcommand = RunStart();
                try
                {
                    foreach (var q in query)
                    {
                        var qq = q.Select(ei => "(" + ei.e.literalid + ",'" + ei.e.literalvalue.Replace('\'', '"') + "'," + (ei.e.literallang == null ? "NULL" : "'" + ei.e.literallang + "'") + ")")
                            .Aggregate((sum, s) => sum + "," + s);
                        runcommand.CommandText = "INSERT INTO rdf_literals VALUES " + qq + ";";
                        runcommand.ExecuteNonQuery();
                    }

                    RunStop(runcommand);
                }
                catch (Exception)
                {
                    RunCancel(runcommand);
                    throw new Exception("Error 2939");
                }
            });
            b_dstatements = new sema2012m.BufferredProcessing<RDFDStatement>(bufferportion, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);
                DbCommand runcommand = RunStart();
                foreach (var q in query)
                {
                    var qq = q.Select(ei => "(" + ei.e.dsubject + "," + ei.e.dpredicate + "," + ei.e.data + ")")
                        .Aggregate((sum, s) => sum + "," + s);
                    runcommand.CommandText = "INSERT INTO rdf_dstatements VALUES " + qq + ";";
                    runcommand.ExecuteNonQuery();
                }
                RunStop(runcommand);
            });
            b_ostatements = new sema2012m.BufferredProcessing<RDFOStatement>(bufferportion, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);
                DbCommand runcommand = RunStart();
                foreach (var q in query)
                {
                    var qq = q.Select(ei => "(" + ei.e.osubject + "," + ei.e.opredicate + "," + ei.e.oobj + ")")
                        .Aggregate((sum, s) => sum + "," + s);
                    runcommand.CommandText = "INSERT INTO rdf_ostatements VALUES " + qq + ";";
                    runcommand.ExecuteNonQuery();
                }
                RunStop(runcommand);
            });

            if (use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(10, flow =>
            {
                foreach (var tri in flow)
                {
                    int i_s = GetSetEntityIndex(tri.s);
                    int i_p = GetSetEntityIndex(tri.p);
                    if (tri is OProp)
                    {
                        int i_o = GetSetEntityIndex(((OProp)tri).o);
                        b_ostatements.Add(new RDFOStatement() { osubject = i_s, opredicate = i_p, oobj = i_o });
                    }
                    else
                    {
                        int i_lit = LIT_INDEX;
                        LIT_INDEX++;
                        DProp dp = (DProp)tri;
                        var rdflit = new RDFLiteral() { literalid = i_lit, literalvalue = dp.d };
                        if (dp.lang != null) rdflit.literallang = dp.lang;
                        b_literals.Add(rdflit);
                        b_dstatements.Add(new RDFDStatement() { dsubject = i_s, dpredicate = i_p, data = i_lit });
                    }
                }
            });

            int tbuffervolume = 20000;
            int tbufferportion = 200;
            if (!use_entity_dic) b_triplets = new BufferredProcessing<Triplet>(tbuffervolume, flow =>
            {
                var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / tbufferportion, ei => ei);
                // отложенная буферизация
                List<RDFLiteral> l_list = new List<RDFLiteral>();
                List<RDFOStatement> o_list = new List<RDFOStatement>();
                List<RDFDStatement> d_list = new List<RDFDStatement>();

                DbCommand runcommand = RunStart();
                foreach (var q in query)
                {
                    var triplets = q.Select(ei => ei.e).ToArray();
                    var entities = triplets.Select(tr => tr.s)
                        .Concat(triplets.Select(tr => tr.p))
                        .Concat(triplets.Where(tr => tr is OProp).Cast<OProp>().Select(op => op.o))
                        .Distinct().OrderBy(x => x).ToArray();
                    List<KeyValuePair<string, int>> entityindexes;

                    entityindexes = EntityIndexes(entities, runcommand).OrderBy(x => x.Key).ToList();

                    Dictionary<string, int> dic = entityindexes.ToDictionary(pair => pair.Key, pair => pair.Value);
                    var unindexedentities = entities.Where(en => entityindexes.All(ei => ei.Key != en));

                    StringBuilder sb = new StringBuilder();
                    bool notfirst = false;
                    foreach (var uie in unindexedentities)
                    {
                        if (notfirst) sb.Append(',');
                        notfirst = true;
                        sb.Append("(" + ENT_INDEX + ",'" + uie + "')");
                        //entityindexes.Add(new KeyValuePair<string, int>(uie, ENT_INDEX));
                        dic.Add(uie, ENT_INDEX);
                        ENT_INDEX++;
                    }
                    if (sb.Length > 0)
                    {
                        runcommand.CommandText = "INSERT INTO rdf_entities VALUES " + sb.ToString() + ";";
                        runcommand.ExecuteNonQuery();
                    }

                    foreach (var tr in triplets)
                    {
                        if (tr is OProp)
                        {
                            o_list.Add(new RDFOStatement() { osubject = dic[tr.s], opredicate = dic[tr.p], oobj = dic[((OProp)tr).o] });
                        }
                        else
                        {
                            DProp dp = (DProp)tr;
                            l_list.Add(new RDFLiteral() { literalid = LIT_INDEX, literalvalue = dp.d, literallang = dp.lang });
                            d_list.Add(new RDFDStatement() { dsubject = dic[tr.s], dpredicate = dic[tr.p], data = LIT_INDEX });
                            LIT_INDEX++;
                        }
                    }
                }
                RunStop(runcommand);
                foreach(var v in l_list) b_literals.Add(v);
                foreach (var v in o_list) b_ostatements.Add(v);
                foreach (var v in d_list) b_dstatements.Add(v);
            });
        }
Beispiel #4
0
 public static IEnumerable<RDFLiteral> GetLiteralsForIds(IEnumerable<int> ids)
 {
     List<RDFLiteral> literals = new List<RDFLiteral>();
     BufferredProcessing<int> buff = new BufferredProcessing<int>(200, id_set =>
     {
         string sql = "SELECT literalid,literalvalue,literallang FROM rdf_literals WHERE " +
             id_set.Select(id => "literalid=" + id).Aggregate((sum, s) => sum + " OR " + s);
         literals.AddRange(RelationalDb.RunQuery(sql).Select(r =>
         new RDFLiteral() { literalid = (int)r[0], literalvalue = (string)r[1],
             literallang = (r[2] is DBNull) ? null : (string)r[2] }));
     });
     foreach (var id in ids) buff.Add(id);
     buff.Flush();
     return literals;
 }
Beispiel #5
0
 public static IEnumerable<RDFDStatement> GetDStatementsForSubjects(IEnumerable<int> subjects)
 {
     List<RDFDStatement> dstatements = new List<RDFDStatement>();
     BufferredProcessing<int> buff = new BufferredProcessing<int>(200, subj_set =>
     {
         string sql = "SELECT dsubject,dpredicate,data FROM rdf_dstatements WHERE " +
             subj_set.Select(su => "dsubject=" + su).Aggregate((sum, s) => sum + " OR " + s);
         dstatements.AddRange(RelationalDb.RunQuery(sql).Select(r =>
         new RDFDStatement()
         {
             dsubject = (int)r[0],
             dpredicate = (int)r[1],
             data = (int)r[2],
         }));
     });
     foreach (var subj in subjects) buff.Add(subj);
     buff.Flush();
     return dstatements;
 }
Beispiel #6
0
 public static IEnumerable<RDFOStatement> GetOStatementsForObjs(IEnumerable<int> objs)
 {
     List<RDFOStatement> ostatements = new List<RDFOStatement>();
     BufferredProcessing<int> buff = new BufferredProcessing<int>(200, obj_set =>
     {
         string sql = "SELECT osubject,opredicate,oobj FROM rdf_ostatements WHERE " +
             obj_set.Select(su => "oobj=" + su).Aggregate((sum, s) => sum + " OR " + s);
         ostatements.AddRange(RelationalDb.RunQuery(sql).Select(r =>
         new RDFOStatement()
         {
             osubject = (int)r[0],
             opredicate = (int)r[1],
             oobj = (int)r[2],
         }));
     });
     foreach (var obj in objs) buff.Add(obj);
     buff.Flush();
     return ostatements;
 }
Beispiel #7
0
 public static void FillDatabaseFromTripletFlowVirtuoso(EntitiesAndTripletFlow lfd)
 {
     int bufferportion = 200;
     BufferredProcessing<Triplet> b_entities = new BufferredProcessing<Triplet>(bufferportion, flow =>
     {
         string command = "SPARQL INSERT INTO GRAPH <" + graph + "> {" +
             flow.Select(ent => "<" + ent.s + "> <" + ent.p + "> " +
                 ((ent is OProp) ?  "<" + ((OProp)ent).o + ">" : "\"" +
                     ((DProp)ent).d.Replace('\"', '\'').Replace('\\', '/').Replace('\n', ' ') +
                     (((DProp)ent).lang != null?"@"+((DProp)ent).lang:"") + "\"")
                 + ".")
             .Aggregate((sum, s) => sum + " " + s) + "}\n";
         RunCommand(command);
     });
     // Просканируем поток триплетов и вставим информацию в базу данных
     foreach (var tr in lfd.ScanDatabases())
     {
         b_entities.Add(tr);
     }
     b_entities.Flush();
 }
Beispiel #8
0
 public static void FillDatabaseFromTripletFlow(EntitiesAndTripletFlow lfd)
 {
     int bufferportion = 200;
     BufferredProcessing<RDFEntity> b_entities = new BufferredProcessing<RDFEntity>(bufferportion, flow =>
     {
         string command = "INSERT INTO rdf_entities VALUES " +
             flow.Select(ent => "(" + ent.entityid + "," + ent.entitytype + ",'" + ent.entityvalue + "')")
             .Aggregate((sum, s) => sum + "," + s);
         RunCommand(command);
     });
     BufferredProcessing<RDFLiteral> b_literals = new BufferredProcessing<RDFLiteral>(bufferportion, flow =>
     {
         string command = "INSERT INTO rdf_literals VALUES " +
             flow.Select(lit => "(" + lit.literalid + ",N'" + lit.literalvalue.Replace('\'', '"') + "'," + (lit.literallang == null ? "NULL" : "'" + lit.literallang + "'") + ")")
             .Aggregate((sum, s) => sum + "," + s);
         RunCommand(command);
     });
     BufferredProcessing<RDFDStatement> b_dstatements = new BufferredProcessing<RDFDStatement>(bufferportion, flow =>
     {
         string command = "INSERT INTO rdf_dstatements VALUES " +
             flow.Select(dst => "(" + dst.dsubject + "," + dst.dpredicate + "," + dst.data + ")")
             .Aggregate((sum, s) => sum + "," + s);
         RunCommand(command);
     });
     BufferredProcessing<RDFOStatement> b_ostatements = new BufferredProcessing<RDFOStatement>(bufferportion, flow =>
     {
         string command = "INSERT INTO rdf_ostatements VALUES " +
             flow.Select(ost => "(" + ost.osubject + "," + ost.opredicate + "," + ost.oobj + ")")
             .Aggregate((sum, s) => sum + "," + s);
         RunCommand(command);
     });
     // Теперь просканируем поток триплетов и вставим информацию в базу данных
     foreach (var tr in lfd.ScanDatabases())
     {
         if (tr is OProp)
         {
             OProp op = (OProp)tr;
             int subj_i;
             if (lfd.TryGetEntity(op.s, out subj_i))
             {
                 int type_i = -1; string type_s = lfd.GetTypeOfEntity(op.s);
                 if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s });
                 b_entities.Add(new RDFEntity() { entityid = subj_i, entitytype = type_i, entityvalue = op.s });
             }
             int pred_i;
             if (lfd.TryGetEntity(op.p, out pred_i)) b_entities.Add(new RDFEntity() { entityid = pred_i, entitytype = -1, entityvalue = op.p });
             int obj_i;
             if (lfd.TryGetEntity(op.o, out obj_i))
             {
                 int type_i = -1; string type_s = lfd.GetTypeOfEntity(op.o);
                 if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s });
                 b_entities.Add(new RDFEntity() { entityid = obj_i, entitytype = type_i, entityvalue = op.o });
             }
             b_ostatements.Add(new RDFOStatement() { osubject = subj_i, opredicate = pred_i, oobj = obj_i });
         }
         else
         {
             DProp dp = (DProp)tr;
             int lit_i;
             if (lfd.TryGetLiteral(dp.d, out lit_i, dp.lang)) b_literals.Add(new RDFLiteral() { literalid = lit_i, literalvalue = dp.d, literallang = dp.lang });
             int subj_i;
             if (lfd.TryGetEntity(dp.s, out subj_i))
             {
                 int type_i = -1; string type_s = lfd.GetTypeOfEntity(dp.s);
                 if (type_s != null && lfd.TryGetEntity(type_s, out type_i)) b_entities.Add(new RDFEntity() { entityid = type_i, entityvalue = type_s });
                 b_entities.Add(new RDFEntity() { entityid = subj_i, entitytype = type_i, entityvalue = dp.s });
             }
             int pred_i;
             if (lfd.TryGetEntity(dp.p, out pred_i)) b_entities.Add(new RDFEntity() { entityid = pred_i, entitytype = -2, entityvalue = dp.p });
             b_dstatements.Add(new RDFDStatement() { dsubject = subj_i, dpredicate = pred_i, data = lit_i });
         }
     }
     b_entities.Flush();
     b_literals.Flush();
     b_dstatements.Flush();
     b_ostatements.Flush();
 }
 // Четыре действия транзакции ввода. Транзакция наинается с первого действия, потом произвольное количество вторых
 // и/или IncludeXElement и заканчивается третьим. Первое действие генерирует DbCommand для случая использования DbConnection
 // или что-то другое для другой СУБД. Комманда должна "протягиваться" далее по действиям для поддержания транзакции. FlushAdapterBuffers сбрасывает буфера и закрывает транзакцию
 // Для СУБД, где невыгодно размельчать БД до триплетов, IncludeXElement переопределяется (без использования FillDatabaseFromTripletFlow)
 protected override void InitAdapterBuffers()
 {
     // размер буфера
     int bufferportion = 10000;
     // размер порции для внедрения данных
     int portion = 20;
     b_entities = new BufferredProcessing<Triplet>(bufferportion, flow =>
     {
         var query = flow.Select((ent, i) => new { e = ent, i = i }).GroupBy(ei => ei.i / portion, ei => ei);
         VirtuosoCommand trcommand = RunStart();
         foreach (var q in query)
         {
             string data = q.Select(ei => "<" + CorrectEntity(ei.e.s) + "> <" + CorrectEntity(ei.e.p) + "> " +
                 (ei.e is sema2012m.OProp ?
                     "<" + CorrectEntity(((sema2012m.OProp)ei.e).o) + "> ." :
                     "\"" + ((DProp)ei.e).d.Replace('\"', '\'').Replace('\\', '/').Replace('\n', ' ') +
                         (((DProp)ei.e).lang != null ? "@" + ((DProp)ei.e).lang : "") + "\" ."))
                 .Aggregate((sum, s) => sum + " " + s);
             //bool found = q.Any(ei => ei.e.s == "Gury_Marchuk");
             trcommand.CommandText = "SPARQL INSERT INTO GRAPH <" + graph + "> {" + data + "}\n";
             try
             {
                 trcommand.ExecuteNonQuery();
                 //if (data.Contains("Gury_Marchuk")) Protocol("GM: " + data);
                 Nreceived += q.Count();
             }
             catch (Exception ex)
             {
                 //Protocol("Err: " + ex.Message);
                 if (dynamicLog != null) dynamicLog.AppendLine("Err during data loading: " + ex.Message + "\nSPARQL data=" + data);
             }
         }
         RunStop(trcommand);
     });
 }