Exemplo n.º 1
0
 /// <summary>
 /// Detokenizes the specified tokens.
 /// </summary>
 /// <param name="tokens">The tokens.</param>
 /// <param name="language">The language.</param>
 /// <returns>The resulting text.</returns>
 public string Detokenize(Token[] tokens, TokenizerLanguage language)
 {
     if (!Languages.ContainsKey(language))
     {
         return("");
     }
     return(Languages[language].Detokenize(tokens));
 }
Exemplo n.º 2
0
 /// <summary>
 /// Detokenizes the specified tokens.
 /// </summary>
 /// <param name="tokens">The tokens.</param>
 /// <param name="language">The language.</param>
 /// <returns>The resulting text.</returns>
 public string Detokenize(Token[] tokens, TokenizerLanguage language)
 {
     if (!Languages.TryGetValue(language, out var Tokenizer) || tokens is null || tokens.Length == 0)
     {
         return(string.Empty);
     }
     return(Tokenizer.Detokenize(tokens));
 }
Exemplo n.º 3
0
        /// <summary>
        /// Tokenizes the specified text.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="language">The language.</param>
        /// <returns>The tokens found.</returns>
        public Token[] Tokenize(string text, TokenizerLanguage language)
        {
            if (!Languages.TryGetValue(language, out var Tokenizer))
            {
                return(Array.Empty <Token>());
            }
            var Stream = new TokenizableStream <char>(text?.ToCharArray() ?? Array.Empty <char>());

            return(Tokenizer.Tokenize(Stream));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Tokenizes the specified text.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="language">The language.</param>
        /// <returns>The tokens found.</returns>
        public Token[] Tokenize(string text, TokenizerLanguage language)
        {
            if (!Languages.ContainsKey(language))
            {
                return(Array.Empty <Token>());
            }
            var Language = Languages[language];
            var Stream   = new TokenizableStream <char>(text?.ToCharArray());

            return(Language.Tokenize(Stream));
        }
Exemplo n.º 5
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Document"/> class.
 /// </summary>
 /// <param name="sentences">The sentences.</param>
 /// <param name="tokens">The tokens.</param>
 /// <param name="originalText">The original text.</param>
 /// <param name="featureExtractor">The feature extractor.</param>
 /// <param name="textSummarizer">The text summarizer.</param>
 /// <param name="tokenizer">The tokenizer.</param>
 /// <param name="tokenizerLanguage">The tokenizer language.</param>
 public Document(
     Sentence[] sentences,
     Token[] tokens,
     string originalText,
     IFeatureExtractor featureExtractor,
     ITextSummarizer textSummarizer,
     ITokenizer tokenizer,
     TokenizerLanguage tokenizerLanguage)
 {
     Sentences         = sentences;
     OriginalText      = originalText;
     Tokens            = tokens;
     TextSummarizer    = textSummarizer;
     FeatureExtractor  = featureExtractor;
     TokenizerLanguage = tokenizerLanguage;
     Tokenizer         = tokenizer;
     ID = Guid.NewGuid();
 }
Exemplo n.º 6
0
        /// <summary>
        /// Detokenizes the specified sentences.
        /// </summary>
        /// <param name="sentences">The sentences.</param>
        /// <param name="language">The language.</param>
        /// <returns>The resulting text.</returns>
        public string Detokenize(Sentence[] sentences, TokenizerLanguage language)
        {
            if (!Languages.TryGetValue(language, out var Tokenizer) || sentences is null || sentences.Length == 0)
            {
                return(string.Empty);
            }
            var Builder = ObjectPool.Get();

            Builder.Append(Tokenizer.Detokenize(sentences[0].Tokens));
            for (int x = 1; x < sentences.Length; ++x)
            {
                Builder.Append(" ").Append(Tokenizer.Detokenize(sentences[x].Tokens));
            }
            var ReturnValue = Builder.ToString();

            ObjectPool.Return(Builder);
            return(ReturnValue);
        }
Exemplo n.º 7
0
 /// <summary>
 /// Withes the specified tokenizer language.
 /// </summary>
 /// <param name="tokenizer">The tokenizer to use.</param>
 /// <returns>This.</returns>
 /// <exception cref="ArgumentNullException">tokenizerLanguage</exception>
 public Pipeline With(TokenizerLanguage tokenizer)
 {
     TokenizerLanguage = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer));
     return(this);
 }