public void RepairNotOnly() { for (var i = 0; i < 1000; i++) { var result = repairHandler.Repair(handler.Repair("We not only brought this book but also liked it")); Assert.AreEqual("We brought this book and liked it", result); } }
protected override LightDocument ActualProcess(ParseRequest request) { // NOT Thread Safe var sentences = sentenceSplitter.Split(request.Document.Text).ToArray(); var document = new LightDocument(); document.Text = request.Document.Text; document.Sentences = new LightSentence[sentences.Length]; int added = 0; foreach (var sentence in sentences) { var text = repairHandler.Repair(sentence); if (sentence != text) { log.LogTrace("Sentence repaired!"); } var result = ProcessSentence(text); if (result != null) { document.Sentences[added] = result; added++; } } if (added < document.Sentences.Length) { var sentencesData = document.Sentences; Array.Resize(ref sentencesData, added); document.Sentences = sentencesData; } return(document); }
protected override LightDocument ActualProcess(ParseRequest request) { var sentences = sentenceSplitter.Split(request.Document.Text).ToArray(); var sentenceDataList = new List <SentenceData>(sentences.Length); foreach (var sentence in sentences) { var text = repairHandler.Repair(sentence); if (sentence != text) { log.LogDebug("Sentence repaired!"); } var sentenceData = new SentenceData { Text = text }; sentenceData.Tokens = tokenizer.Tokenize(sentenceData.Text); if (sentenceData.Tokens.Length <= 0) { continue; } sentenceData.Tags = posTagger.Tag(sentenceData.Tokens); sentenceData.Chunks = chunker.ChunkAsSpans(sentenceData.Tokens, sentenceData.Tags).ToArray(); sentenceDataList.Add(sentenceData); } var document = new LightDocument(); document.Text = request.Document.Text; document.Sentences = new LightSentence[sentenceDataList.Count]; for (var index = 0; index < sentenceDataList.Count; index++) { SentenceData sentenceData = sentenceDataList[index]; if (string.IsNullOrWhiteSpace(sentenceData.Text)) { continue; } var currentSentence = new LightSentence(); currentSentence.Text = sentenceData.Text; document.Sentences[index] = currentSentence; var chunks = new Dictionary <int, Span>(); foreach (Span chunk in sentenceData.Chunks) { for (var i = chunk.Start; i < chunk.End; i++) { chunks[i] = chunk; } } currentSentence.Words = new LightWord[sentenceData.Tokens.Length]; for (var i = 0; i < sentenceData.Tokens.Length; i++) { var wordData = new LightWord(); wordData.Tag = sentenceData.Tags[i]; wordData.Text = sentenceData.Tokens[i]; currentSentence.Words[i] = wordData; if (chunks.TryGetValue(i, out Span chunk)) { wordData.Phrase = chunk.Type; } } } return(document); }