Example #1
0
        // we would like to be able to parse quickly. With Antlr there is supposed to be a way to parse quicker if we arent building a tree.
        // here is a starting point for exploring that.
        private async Task <int> ParseNTStreamNoTree(Stream data)
        {
            ConcurrentQueue <Node> batch = new ConcurrentQueue <Node>();

            var parser = new NTRIPLESParser(ParserUtils.makeNTRIPLESStream(data));

            //parser.TrimParseTree = true;
            parser.BuildParseTree = true;
            int lastValidPosition = 0;

            parser.AddErrorListener(new ErrorListener());
            NTRIPLESParser.TripleContext cc = null;

            for (;; cc = parser.triple())
            {
                if (cc?.exception != null
                    //&& cc.exception.GetType() != typeof(Antlr4.Runtime.InputMismatchException)
                    //&& cc.exception.GetType() != typeof(Antlr4.Runtime.NoViableAltException)
                    )
                {
                    Console.WriteLine(cc.exception.Message);
                    Console.WriteLine(
                        $"found {cc.exception.OffendingToken.Text} at Line {cc.exception.OffendingToken.Line} offset at {cc.exception.OffendingToken.StartIndex}");
                }

                if (cc != null)
                {
                    lastValidPosition = cc.Start.StartIndex;
                }
                //lastValidPosition = parser.CurrentToken.StopIndex;
                if (parser.CurrentToken.Type == TokenConstants.Eof)
                {
                    break;
                }
            }

            return(lastValidPosition);
        }
Example #2
0
        // Import NTriples into the database
        private async Task ParseNTriplesStream(Stream data)
        {
            var parser = new NTRIPLESParser(ParserUtils.makeNTRIPLESStream(data));

            parser.BuildParseTree = true;

            parser.AddErrorListener(new ErrorListener());
            NTRIPLESParser.TripleContext cc = null;

            int       count            = 1;
            int       idSize           = 128;
            int       mapSize          = 128;
            ValueTask previousOperaion = new ValueTask(Task.CompletedTask);

            byte[] prev1 = null;
            byte[] prev2 = null;
            for (;; cc = parser.triple())
            {
                if (cc != null && cc.exception == null)
                {
                    // let's calculate average size for the ID, and the value, then use those values to rent buffers.
                    var idBuffer = ArrayPool <Byte> .Shared.Rent(idSize / count);

                    var mapBuffer = ArrayPool <Byte> .Shared.Rent(mapSize / count);

                    var bb         = new ByteBuffer(idBuffer);
                    var mapBb      = new ByteBuffer(mapBuffer);
                    var builder    = new FlatBufferBuilder(bb);
                    var mapBuilder = new FlatBufferBuilder(mapBb);
                    var node       = NtriplesListener.ContextToNode(cc, builder, mapBuilder);
                    count++;
                    idSize  += node.Id.ByteBuffer.Length;
                    mapSize += node.Id.ByteBuffer.Length;
                    if (!previousOperaion.IsCompleted)
                    {
                        await previousOperaion;
                    }
                    if (prev1 != null)
                    {
                        ArrayPool <Byte> .Shared.Return(prev1, false);

                        ArrayPool <Byte> .Shared.Return(prev2, false);
                    }

                    previousOperaion = _db.AddNoQueue(node);

                    prev1 = idBuffer;
                    prev2 = mapBuffer;
                }
                else if (cc?.exception != null
                         //&& cc.exception.GetType() != typeof(Antlr4.Runtime.InputMismatchException)
                         //&& cc.exception.GetType() != typeof(Antlr4.Runtime.NoViableAltException)
                         )
                {
                    Console.WriteLine(cc.exception.Message);
                    Console.WriteLine(
                        $"found {cc.exception.OffendingToken.Text} at Line {cc.exception.OffendingToken.Line} offset at {cc.exception.OffendingToken.StartIndex}");
                }

                if (parser.CurrentToken.Type == TokenConstants.Eof)
                {
                    break;
                }
            }
        }