Beispiel #1
0
        protected void Configure <V, K>(V builder) where V : Builder <K> where K : TokenizerBase <T>
        {
            builder.LoadDictionaries();

            this.tokenFactory = builder.TokenFactory;

            this.tokenInfoDictionary = builder.TokenInfoDictionary;
            this.unknownDictionary   = builder.UnknownDictionary;
            this.userDictionary      = builder.UserDictionary;
            this.insertedDictionary  = builder.InsertedDictionary;

            this.viterbiBuilder = new ViterbiBuilder(
                builder.Fst,
                tokenInfoDictionary,
                unknownDictionary,
                userDictionary,
                builder.Mode
                );

            this.viterbiSearcher = new ViterbiSearcher(
                builder.Mode,
                builder.ConnectionCosts,
                unknownDictionary,
                builder.Penalties
                );

            this.viterbiFormatter = new ViterbiFormatter(builder.ConnectionCosts);
            this.split            = builder.Split;

            InitDictionaryMap();
        }
Beispiel #2
0
        /// <summary>
        /// Writes the Viterbi lattice for the provided text to an output stream
        ///
        /// The output is written in <a href="https://en.wikipedia.org/wiki/DOT_(graph_description_language)">DOT</a> format.
        ///
        /// This method is not thread safe
        /// </summary>
        /// <param name="output">output stream to write to</param>
        /// <param name="text">text to create lattice for</param>
        public void DebugLattice(Stream output, string text)
        {
            var lattice = ViterbiBuilder.Build(text);

            using (var writer = new StreamWriter(output, Encoding.UTF8, 1024, true))
            {
                writer.Write(ViterbiFormatter.Format(lattice));
            }
        }
Beispiel #3
0
        /// <summary>
        /// Tokenizes the provided text and outputs the corresponding Viterbi lattice and the Viterbi path to the provided output stream
        ///
        /// The output is written in <a href="https://en.wikipedia.org/wiki/DOT_(graph_description_language)">DOT</a> format.
        ///
        /// This method is not thread safe
        /// </summary>
        /// <param name="output">output stream to write to</param>
        /// <param name="text">text to tokenize</param>
        public void DebugTokenize(Stream output, string text)
        {
            var lattice  = ViterbiBuilder.Build(text);
            var bestPath = ViterbiSearcher.Search(lattice);

            using (var writer = new StreamWriter(output, Encoding.UTF8, 1024, true))
            {
                writer.Write(ViterbiFormatter.Format(lattice, bestPath));
            }
        }
Beispiel #4
0
        protected void Configure(BuilderBase builder)
        {
            builder.LoadDictionaries();

            TokenFactory = builder.TokenFactory;

            TokenInfoDictionary = builder.TokenInfoDictionary;
            UnknownDictionary   = builder.UnknownDictionary;
            UserDictionary      = builder.UserDictionary;
            InsertedDictionary  = builder.InsertedDictionary;

            ViterbiBuilder   = new ViterbiBuilder(builder.DoubleArrayTrie, TokenInfoDictionary, UnknownDictionary, UserDictionary, builder.Mode);
            ViterbiSearcher  = new ViterbiSearcher(builder.Mode, builder.ConnectionCosts, UnknownDictionary, builder.Penalties);
            ViterbiFormatter = new ViterbiFormatter(builder.ConnectionCosts);
            Split            = builder.Split;

            InitDictionaryMap();
        }
Beispiel #5
0
        public void Dispose()
        {
            if (viterbiBuilder != null)
            {
                viterbiBuilder.Dispose();
            }
            if (viterbiSearcher != null)
            {
                viterbiSearcher.Dispose();
            }
            if (viterbiFormatter != null)
            {
                viterbiFormatter.Dispose();
            }
            if (tokenInfoDictionary != null)
            {
                tokenInfoDictionary.Dispose();
            }

            viterbiBuilder      = null;
            viterbiSearcher     = null;
            viterbiFormatter    = null;
            tokenInfoDictionary = null;
        }