Exemple #1
0
        /**
         * Tokenize input sentence.
         *
         * @param offset   offset of sentence in original input text
         * @param text sentence to tokenize
         * @return list of Token
         */
        private List <T> CreateTokenList(int offset, string text)
        {
            List <T> result = new List <T>();

            ViterbiLattice     lattice  = viterbiBuilder.Build(text);
            List <ViterbiNode> bestPath = viterbiSearcher.Search(lattice);

            foreach (ViterbiNode node in bestPath)
            {
                int wordId = node.WordId;
                if (node.Type == ViterbiNode.NodeType.KNOWN && wordId == -1)
                { // Do not include BOS/EOS
                    continue;
                }
                T token = (T)tokenFactory.CreateToken(
                    wordId,
                    node.Surface,
                    node.Type,
                    offset + node.StartIndex,
                    dictionaryMap[node.Type]
                    );
                result.Add(token);
            }
            return(result);
        }
Exemple #2
0
        /**
         * Tokenize input sentence. Up to maxCount different paths of cost at most OPT + costSlack are returned ordered in ascending order by cost, where OPT is the optimal solution.
         *
         * @param text sentence to tokenize
         * @param maxCount  maximum number of paths
         * @param costSlack  maximum cost slack of a path
         * @return  instance of MultiSearchResult containing the tokenizations
         */
        private MultiSearchResult CreateMultiSearchResult(string text, int maxCount, int costSlack)
        {
            ViterbiLattice    lattice           = viterbiBuilder.Build(text);
            MultiSearchResult multiSearchResult = viterbiSearcher.SearchMultiple(lattice, maxCount, costSlack);

            return(multiSearchResult);
        }
Exemple #3
0
        /**
         * Writes the Viterbi lattice for the provided text to an output stream
         * <p>
         * The output is written in <a href="https://en.wikipedia.org/wiki/DOT_(graph_description_language)">DOT</a> format.
         * <p>
         * This method is not thread safe
         *
         * @param outputStream  output stream to write to
         * @param text  text to create lattice for
         * @throws java.io.IOException if an error occurs when writing the lattice
         */
        public void DebugLattice(Stream outputStream, string text)
        {
            ViterbiLattice lattice = viterbiBuilder.Build(text);

            using (var writer = new StreamWriter(outputStream, Encoding.UTF8))
            {
                var bytes = Encoding.UTF8.GetBytes(viterbiFormatter.Format(lattice));
                outputStream.Write(bytes, 0, bytes.Length);
            }
        }