Beispiel #1
0
        private CompiledNode compileNode(UnCompiledNode <T> nodeIn, int tailLength)
        {
            long node;
            long bytesPosStart = bytes.getPosition();

            //TODO: deduphash
            node = fst.addNode(this, nodeIn);
            Debug.Assert(node != -2);

            long bytesPosEnd = bytes.getPosition();

            if (bytesPosEnd != bytesPosStart)
            {
                // The FST added a new node:
                Debug.Assert(bytesPosEnd > bytesPosStart);
                lastFrozenNode = node;
            }

            nodeIn.clear();

            CompiledNode fn = new CompiledNode();

            fn.node = node;
            return(fn);
        }
Beispiel #2
0
        private void freezeTail(int prefixLenPlus1)
        {
            int downTo = Math.Max(1, prefixLenPlus1);

            for (int idx = lastInput.length; idx >= downTo; idx--)
            {
                bool doPrune   = false;
                bool doCompile = false;

                UnCompiledNode <T> node   = frontier[idx];
                UnCompiledNode <T> parent = frontier[idx - 1];

                if (node.inputCount < minSuffixCount1)
                {
                    doPrune   = true;
                    doCompile = true;
                }
                else if (idx > prefixLenPlus1)
                {
                    // prune if parent's inputCount is less than suffixMinCount2
                    if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1))
                    {
                        // my parent, about to be compiled, doesn't make the cut, so
                        // I'm definitely pruned

                        // if minSuffixCount2 is 1, we keep only up
                        // until the 'distinguished edge', ie we keep only the
                        // 'divergent' part of the FST. if my parent, about to be
                        // compiled, has inputCount 1 then we are already past the
                        // distinguished edge.  NOTE: this only works if
                        // the FST outputs are not "compressible" (simple
                        // ords ARE compressible).
                        doPrune = true;
                    }
                    else
                    {
                        // my parent, about to be compiled, does make the cut, so
                        // I'm definitely not pruned
                        doPrune = false;
                    }
                    doCompile = true;
                }
                else
                {
                    // if pruning is disabled (count is 0) we can always
                    // compile current node
                    doCompile = minSuffixCount2 == 0;
                }

                if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1))
                {
                    // drop all arcs
                    for (int arcIdx = 0; arcIdx < node.numArcs; arcIdx++)
                    {
                        UnCompiledNode <T> target = (UnCompiledNode <T>)node.arcs[arcIdx].target;
                        target.clear();
                    }
                    node.numArcs = 0;
                }

                if (doPrune)
                {
                    // this node doesn't make it -- deref it
                    node.clear();
                    parent.deleteLast(lastInput.ints[idx - 1], node);
                }
                else
                {
                    if (minSuffixCount2 != 0)
                    {
                        //TODO: minSuffixCount2 is always 0 for now.
                        //compileAllTargets(node, lastInput.length()-idx);
                    }
                    T nextFinalOutput = node.output;

                    // We "fake" the node as being final if it has no
                    // outgoing arcs; in theory we could leave it
                    // as non-final (the FST can represent this), but
                    // FSTEnum, Util, etc., have trouble w/ non-final
                    // dead-end states:
                    bool isFinal = node.isFinal || node.numArcs == 0;

                    if (doCompile)
                    {
                        // this node makes it and we now compile it.  first,
                        // compile any targets that were previously
                        // undecided:
                        parent.replaceLast(lastInput.ints[idx - 1],
                                           compileNode(node, 1 + lastInput.length - idx),
                                           nextFinalOutput,
                                           isFinal);
                    }
                    else
                    {
                        // replaceLast just to install
                        // nextFinalOutput/isFinal onto the arc
                        parent.replaceLast(lastInput.ints[idx - 1],
                                           node,
                                           nextFinalOutput,
                                           isFinal);
                        // this node will stay in play for now, since we are
                        // undecided on whether to prune it.  later, it
                        // will be either compiled or pruned, so we must
                        // allocate a new node:
                        frontier[idx] = new UnCompiledNode <T>(this, idx);
                    }
                }
            }
        }