コード例 #1
0
        public static Document Construct(this LightDocument document, IWordFactory factory)
        {
            var result = new Document(document.Text);

            result.Author       = document.Author;
            result.DocumentTime = document.DocumentTime;
            result.Id           = document.Id;
            document.Title      = document.Title;

            foreach (var sentence in document.Sentences)
            {
                var resultSentence = new SentenceItem(sentence.Text);
                result.Add(resultSentence);
                if (sentence.Words != null)
                {
                    for (var i = 0; i < sentence.Words.Length; i++)
                    {
                        var word     = sentence.Words[i];
                        var wordItem = factory.CreateWord(word.Text, word.Tag);
                        wordItem.WordIndex = i;
                        WordEx wordData = WordExFactory.Construct(wordItem);
                        wordData.Phrase = word.Phrase;
                        resultSentence.Add(wordData);
                    }
                }
            }

            return(result);
        }
コード例 #2
0
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            var           tokenizer       = sentenceTokenizer.Create(true, false);
            var           wordsExtraction = new SimpleWordsExtraction(tokenizer);
            LightDocument document        = wordsExtraction.GetDocument(request.Document.Text).GetLight();

            return(document);
        }
コード例 #3
0
        public Task <bool> Save(LightDocument document)
        {
            var cacheEntryOptions = new MemoryCacheEntryOptions()
                                    .SetSlidingExpiration(TimeSpan.FromMinutes(1));

            document = document.CloneJson();
            // Save data in cache.
            cache.Set(document.GetId(), document, cacheEntryOptions);
            cache.Set(document.GetTextId(), document, cacheEntryOptions);
            return(Task.FromResult(true));
        }
コード例 #4
0
        public async Task <LightDocument> Process(ParseRequest request)
        {
            if (request?.Document == null)
            {
                throw new ArgumentNullException(nameof(request));
            }

            using (new PerformanceTrace(item => log.LogDebug(item), "Process"))
            {
                var text = request.Document.Text.Trim();
                request.Document.Text = text;
                if (string.IsNullOrWhiteSpace(request.Document.Id))
                {
                    var tag = request.Document.Id = Guid.NewGuid().ToString();
                    log.LogDebug("Key not found on document. generating: {0}...", tag);
                }

                LightDocument document = await cache.GetCached(request.Document).ConfigureAwait(false);

                if (document != null)
                {
                    log.LogDebug("Cache HIT");
                    document    = document.CloneJson();
                    document.Id = request.Document.Id;
                    return(document);
                }

                if (!string.IsNullOrEmpty(text))
                {
                    document = await Task.Run(() => ActualProcess(request)).ConfigureAwait(false);
                }
                else
                {
                    document           = new LightDocument();
                    document.Sentences = Array.Empty <LightSentence>();
                    log.LogInformation("Empty document detected");
                }

                document.Id           = request.Document.Id;
                document.DocumentTime = request.Document.DocumentTime;
                document.Author       = request.Document.Author;
                document.Title        = request.Document.Title;
                if (await cache.Save(document).ConfigureAwait(false))
                {
                    return(document);
                }

                return(document);
            }
        }
コード例 #5
0
        public static Document Construct(this LightDocument document, IWordFactory factory)
        {
            var result = new Document(document.Text);

            result.Author       = document.Author;
            result.DocumentTime = document.DocumentTime;
            result.Id           = document.Id;
            document.Title      = document.Title;

            foreach (var sentence in document.Sentences)
            {
                var resultSentence = new SentenceItem(sentence.Text);
                result.Add(resultSentence, false);
                if (sentence.Words != null)
                {
                    for (var i = 0; i < sentence.Words.Length; i++)
                    {
                        var word     = sentence.Words[i];
                        var wordItem = factory.CreateWord(word.Text, word.Tag);
                        wordItem.WordIndex = i;
                        WordEx wordData = WordExFactory.Construct(wordItem);
                        wordData.Phrase = word.Phrase;
                        if (!string.IsNullOrEmpty(word.Entity))
                        {
                            if (entityCache.TryGetValue(word.Entity, out var entity))
                            {
                                wordData.EntityType = entity;
                            }
                            else
                            {
                                wordData.CustomEntity = word.Entity;
                            }
                        }
                        else
                        {
                            wordData.EntityType = NamedEntities.None;
                        }

                        resultSentence.Add(wordData);
                    }
                }
            }

            return(result);
        }
コード例 #6
0
        public async Task <LightDocument> Process(ParseRequest request)
        {
            if (splitter == null)
            {
                log.LogInformation("Constructing NEW {0} splitter...", id);
                Interlocked.Exchange(ref current, 0);
                splitter = factory();
            }

            LightDocument result = await splitter.Process(request).ConfigureAwait(false);

            if (Interlocked.Increment(ref current) >= maxProcessing)
            {
                splitter.Dispose();
                splitter = null;
            }

            return(result);
        }
コード例 #7
0
        public async Task <bool> Save(LightDocument document)
        {
            if (document is null)
            {
                throw new ArgumentNullException(nameof(document));
            }

            if (string.IsNullOrEmpty(document.Id))
            {
                throw new ArgumentException("Value cannot be null or empty id.", nameof(document.Id));
            }

            await local.Save(document).ConfigureAwait(false);

            var key = new RepositoryKey(this, new ObjectKey(document.Id));

            key.AddIndex(new IndexKey(this, "Index:All", false));
            key.AddIndex(new IndexKey(this, $"Index:{document.GetId()}", true));
            key.AddIndex(new IndexKey(this, $"Index:{document.GetTextId()}", true));
            await manager.Client.AddRecord(key, document).ConfigureAwait(false);

            return(true);
        }
コード例 #8
0
        public async Task Test()
        {
            Assert.Throws <ArgumentNullException>(() => instance.GetCached(null));
            var doc = new LightDocument();

            doc.Id   = "Test";
            doc.Text = "Test";
            var result = await instance.GetCached(doc).ConfigureAwait(false);

            Assert.IsNull(result);

            await instance.Save(doc).ConfigureAwait(false);

            result = await instance.GetCached(doc).ConfigureAwait(false);

            Assert.AreNotSame(doc, result);
            Assert.AreEqual("Test", result.Text);
            doc.Id = "2";
            result = await instance.GetCached(doc).ConfigureAwait(false);

            Assert.AreNotSame(doc, result);
            Assert.AreEqual("Test", result.Text);
        }
コード例 #9
0
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            // NOT Thread Safe
            var sentences = sentenceSplitter.Split(request.Document.Text).ToArray();

            var document = new LightDocument();

            document.Text      = request.Document.Text;
            document.Sentences = new LightSentence[sentences.Length];

            int added = 0;

            foreach (var sentence in sentences)
            {
                var text = repairHandler.Repair(sentence);
                if (sentence != text)
                {
                    log.LogTrace("Sentence repaired!");
                }

                var result = ProcessSentence(text);
                if (result != null)
                {
                    document.Sentences[added] = result;
                    added++;
                }
            }

            if (added < document.Sentences.Length)
            {
                var sentencesData = document.Sentences;
                Array.Resize(ref sentencesData, added);
                document.Sentences = sentencesData;
            }

            return(document);
        }
コード例 #10
0
 public void Setup()
 {
     document = new LightDocument();
 }
コード例 #11
0
        protected override LightDocument ActualProcess(ParseRequest request)
        {
            var sentences        = sentenceSplitter.Split(request.Document.Text).ToArray();
            var sentenceDataList = new List <SentenceData>(sentences.Length);

            foreach (var sentence in sentences)
            {
                var text = repairHandler.Repair(sentence);
                if (sentence != text)
                {
                    log.LogDebug("Sentence repaired!");
                }

                var sentenceData = new SentenceData {
                    Text = text
                };
                sentenceData.Tokens = tokenizer.Tokenize(sentenceData.Text);
                if (sentenceData.Tokens.Length <= 0)
                {
                    continue;
                }

                sentenceData.Tags   = posTagger.Tag(sentenceData.Tokens);
                sentenceData.Chunks = chunker.ChunkAsSpans(sentenceData.Tokens, sentenceData.Tags).ToArray();
                sentenceDataList.Add(sentenceData);
            }

            var document = new LightDocument();

            document.Text      = request.Document.Text;
            document.Sentences = new LightSentence[sentenceDataList.Count];
            for (var index = 0; index < sentenceDataList.Count; index++)
            {
                SentenceData sentenceData = sentenceDataList[index];
                if (string.IsNullOrWhiteSpace(sentenceData.Text))
                {
                    continue;
                }

                var currentSentence = new LightSentence();
                currentSentence.Text = sentenceData.Text;

                document.Sentences[index] = currentSentence;
                var chunks = new Dictionary <int, Span>();
                foreach (Span chunk in sentenceData.Chunks)
                {
                    for (var i = chunk.Start; i < chunk.End; i++)
                    {
                        chunks[i] = chunk;
                    }
                }

                currentSentence.Words = new LightWord[sentenceData.Tokens.Length];
                for (var i = 0; i < sentenceData.Tokens.Length; i++)
                {
                    var wordData = new LightWord();
                    wordData.Tag             = sentenceData.Tags[i];
                    wordData.Text            = sentenceData.Tokens[i];
                    currentSentence.Words[i] = wordData;

                    if (chunks.TryGetValue(i, out Span chunk))
                    {
                        wordData.Phrase = chunk.Type;
                    }
                }
            }

            return(document);
        }