C# (CSharp) JapaneseTokenizerFactory.Createの例

プログラミング言語: C# (CSharp)

メソッド/関数: Create

hotexamples.comのコード掲載数: 2

C# (CSharp) JapaneseTokenizerFactory.Create - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC# (CSharp)のJapaneseTokenizerFactory.Createの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Create(2)

よく使われるメソッド

Create (2)

コード例 #1

ファイルを表示

ファイル: TokenizerDemo.cs プロジェクト: bluemarsh/tandoku

        internal static void Dump()
        {
            if (!Console.IsInputRedirected)
            {
                Console.WriteLine("tandoku: expecting input");
                return;
            }

            using (new Utf8EncodingOverride())
            {
                // Ignore input encoding from console, use UTF-8 by default
                using var r = new StreamReader(Console.OpenStandardInput());

                // TODO: try out different modes (normal vs search)

                var factory        = new JapaneseTokenizerFactory(new Dictionary <string, string>());
                var tokenizer      = factory.Create(r);
                int i              = 0;
                var termAttr       = tokenizer.GetAttribute <ICharTermAttribute>();
                var offsetAttr     = tokenizer.GetAttribute <IOffsetAttribute>();
                var posIncrAttr    = tokenizer.GetAttribute <IPositionIncrementAttribute>();
                var posLenAttr     = tokenizer.GetAttribute <IPositionLengthAttribute>();
                var baseFormAttr   = tokenizer.GetAttribute <IBaseFormAttribute>();
                var inflectionAttr = tokenizer.GetAttribute <IInflectionAttribute>();
                var posAttr        = tokenizer.GetAttribute <IPartOfSpeechAttribute>();
                var readingAttr    = tokenizer.GetAttribute <IReadingAttribute>();
                tokenizer.Reset();
                Console.Write($"{"Num",-3}  {"Term",-10}");
                Console.Write($" {"St",-2}/{"En",-2}");
                Console.Write($" {"Ic",-2} {"Ln",-2}");
                Console.Write($" {"Base",-10}");
                Console.Write($" {"POS",-18}");
                Console.Write($" {"InflForm",-8} / {"InflType",-16}");
                Console.Write($" {"Pronunctn",-10} / {"Reading",-10}");
                Console.WriteLine();
                while (tokenizer.IncrementToken())
                {
                    Console.Write($"{i++,3}: {Align(termAttr, 10)}");
                    Console.Write($" {offsetAttr.StartOffset,2}/{offsetAttr.EndOffset,2}");
                    Console.Write($" {posIncrAttr.PositionIncrement,2} {posLenAttr.PositionLength,2}");
                    Console.Write($" {Align(baseFormAttr.GetBaseForm(), 10)}");
                    Console.Write($" {Align(posAttr.GetPartOfSpeech(), 18)}");
                    Console.Write($" {Align(inflectionAttr.GetInflectionForm(), 8)} / {Align(inflectionAttr.GetInflectionType(), 16)}");
                    Console.Write($" {Align(readingAttr.GetPronunciation(), 10)} / {Align(readingAttr.GetReading(), 10)}");
                    Console.WriteLine();

                    if (DictionaryLookupDemo.TryLookupKanji(termAttr.ToString(), out var entries) ||
                        DictionaryLookupDemo.TryLookupReading(termAttr.ToString(), out entries))
                    {
                        foreach (var entry in entries)
                        {
                            Console.WriteLine($"    {string.Join("; ", entry.Glosses)}");
                        }
                    }
                }
                tokenizer.End();
            }
        }

コード例 #2

ファイルを表示

ファイル: Tokenizer.cs プロジェクト: bluemarsh/tandoku

        public IEnumerable <Token> Tokenize(string text)
        {
            // TODO: optimize this to reuse tokenizer
            var tokenizer      = _tokenizerFactory.Create(new StringReader(text));
            var termAttr       = tokenizer.GetAttribute <ICharTermAttribute>();
            var offsetAttr     = tokenizer.GetAttribute <IOffsetAttribute>();
            var posIncrAttr    = tokenizer.GetAttribute <IPositionIncrementAttribute>();
            var posLenAttr     = tokenizer.GetAttribute <IPositionLengthAttribute>();
            var baseFormAttr   = tokenizer.GetAttribute <IBaseFormAttribute>();
            var inflectionAttr = tokenizer.GetAttribute <IInflectionAttribute>();
            var posAttr        = tokenizer.GetAttribute <IPartOfSpeechAttribute>();
            var readingAttr    = tokenizer.GetAttribute <IReadingAttribute>();

            tokenizer.Reset();
            long ordinal = 0;

            while (tokenizer.IncrementToken())
            {
                yield return(new Token
                {
                    Ordinal = ++ordinal,
                    Term = termAttr.ToString(),
                    StartOffset = offsetAttr.StartOffset,
                    EndOffset = offsetAttr.EndOffset,
                    PositionIncrement = posIncrAttr.PositionIncrement,
                    PositionLength = posLenAttr.PositionLength,
                    BaseForm = baseFormAttr.GetBaseForm(),
                    PartOfSpeech = posAttr.GetPartOfSpeech(),
                    InflectionForm = inflectionAttr.GetInflectionForm(),
                    InflectionType = inflectionAttr.GetInflectionType(),
                    Pronunciation = readingAttr.GetPronunciation(),
                    Reading = readingAttr.GetReading(),
                });
            }
            tokenizer.End();
        }