/// <summary> /// Detokenizes the specified tokens. /// </summary> /// <param name="tokens">The tokens.</param> /// <param name="language">The language.</param> /// <returns>The resulting text.</returns> public string Detokenize(Token[] tokens, TokenizerLanguage language) { if (!Languages.ContainsKey(language)) { return(""); } return(Languages[language].Detokenize(tokens)); }
/// <summary> /// Detokenizes the specified tokens. /// </summary> /// <param name="tokens">The tokens.</param> /// <param name="language">The language.</param> /// <returns>The resulting text.</returns> public string Detokenize(Token[] tokens, TokenizerLanguage language) { if (!Languages.TryGetValue(language, out var Tokenizer) || tokens is null || tokens.Length == 0) { return(string.Empty); } return(Tokenizer.Detokenize(tokens)); }
/// <summary> /// Tokenizes the specified text. /// </summary> /// <param name="text">The text.</param> /// <param name="language">The language.</param> /// <returns>The tokens found.</returns> public Token[] Tokenize(string text, TokenizerLanguage language) { if (!Languages.TryGetValue(language, out var Tokenizer)) { return(Array.Empty <Token>()); } var Stream = new TokenizableStream <char>(text?.ToCharArray() ?? Array.Empty <char>()); return(Tokenizer.Tokenize(Stream)); }
/// <summary> /// Tokenizes the specified text. /// </summary> /// <param name="text">The text.</param> /// <param name="language">The language.</param> /// <returns>The tokens found.</returns> public Token[] Tokenize(string text, TokenizerLanguage language) { if (!Languages.ContainsKey(language)) { return(Array.Empty <Token>()); } var Language = Languages[language]; var Stream = new TokenizableStream <char>(text?.ToCharArray()); return(Language.Tokenize(Stream)); }
/// <summary> /// Initializes a new instance of the <see cref="Document"/> class. /// </summary> /// <param name="sentences">The sentences.</param> /// <param name="tokens">The tokens.</param> /// <param name="originalText">The original text.</param> /// <param name="featureExtractor">The feature extractor.</param> /// <param name="textSummarizer">The text summarizer.</param> /// <param name="tokenizer">The tokenizer.</param> /// <param name="tokenizerLanguage">The tokenizer language.</param> public Document( Sentence[] sentences, Token[] tokens, string originalText, IFeatureExtractor featureExtractor, ITextSummarizer textSummarizer, ITokenizer tokenizer, TokenizerLanguage tokenizerLanguage) { Sentences = sentences; OriginalText = originalText; Tokens = tokens; TextSummarizer = textSummarizer; FeatureExtractor = featureExtractor; TokenizerLanguage = tokenizerLanguage; Tokenizer = tokenizer; ID = Guid.NewGuid(); }
/// <summary> /// Detokenizes the specified sentences. /// </summary> /// <param name="sentences">The sentences.</param> /// <param name="language">The language.</param> /// <returns>The resulting text.</returns> public string Detokenize(Sentence[] sentences, TokenizerLanguage language) { if (!Languages.TryGetValue(language, out var Tokenizer) || sentences is null || sentences.Length == 0) { return(string.Empty); } var Builder = ObjectPool.Get(); Builder.Append(Tokenizer.Detokenize(sentences[0].Tokens)); for (int x = 1; x < sentences.Length; ++x) { Builder.Append(" ").Append(Tokenizer.Detokenize(sentences[x].Tokens)); } var ReturnValue = Builder.ToString(); ObjectPool.Return(Builder); return(ReturnValue); }
/// <summary> /// Withes the specified tokenizer language. /// </summary> /// <param name="tokenizer">The tokenizer to use.</param> /// <returns>This.</returns> /// <exception cref="ArgumentNullException">tokenizerLanguage</exception> public Pipeline With(TokenizerLanguage tokenizer) { TokenizerLanguage = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer)); return(this); }