Beispiel #1
0
        /// <summary>
        /// Instantiates an FST/FSA builder with all the possible tuning and construction
        /// tweaks. Read parameter documentation carefully.
        /// </summary>
        /// <param name="inputType">
        ///    The input type (transition labels). Can be anything from <see cref="Lucene.Net.Util.Fst.FST.INPUT_TYPE"/>
        ///    enumeration. Shorter types will consume less memory. Strings (character sequences) are
        ///    represented as <see cref="Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE4"/> (full unicode codepoints).
        /// </param>
        /// <param name="minSuffixCount1">
        ///    If pruning the input graph during construction, this threshold is used for telling
        ///    if a node is kept or pruned. If transition_count(node) &gt;= minSuffixCount1, the node
        ///    is kept.
        /// </param>
        /// <param name="minSuffixCount2">
        ///    (Note: only Mike McCandless knows what this one is really doing...)
        /// </param>
        /// <param name="doShareSuffix">
        ///    If <c>true</c>, the shared suffixes will be compacted into unique paths.
        ///    this requires an additional RAM-intensive hash map for lookups in memory. Setting this parameter to
        ///    <c>false</c> creates a single suffix path for all input sequences. this will result in a larger
        ///    FST, but requires substantially less memory and CPU during building.
        /// </param>
        /// <param name="doShareNonSingletonNodes">
        ///    Only used if <paramref name="doShareSuffix"/> is <c>true</c>.  Set this to
        ///    true to ensure FST is fully minimal, at cost of more
        ///    CPU and more RAM during building.
        /// </param>
        /// <param name="shareMaxTailLength">
        ///    Only used if <paramref name="doShareSuffix"/> is <c>true</c>.  Set this to
        ///    <see cref="int.MaxValue"/> to ensure FST is fully minimal, at cost of more
        ///    CPU and more RAM during building.
        /// </param>
        /// <param name="outputs"> The output type for each input sequence. Applies only if building an FST. For
        ///    FSA, use <see cref="NoOutputs.Singleton"/> and <see cref="NoOutputs.NoOutput"/> as the
        ///    singleton output object.
        /// </param>
        /// <param name="doPackFST"> Pass <c>true</c> to create a packed FST.
        /// </param>
        /// <param name="acceptableOverheadRatio"> How to trade speed for space when building the FST. this option
        ///    is only relevant when doPackFST is true. <see cref="PackedInt32s.GetMutable(int, int, float)"/>
        /// </param>
        /// <param name="allowArrayArcs"> Pass false to disable the array arc optimization
        ///    while building the FST; this will make the resulting
        ///    FST smaller but slower to traverse.
        /// </param>
        /// <param name="bytesPageBits"> How many bits wide to make each
        ///    <see cref="T:byte[]"/> block in the <see cref="BytesStore"/>; if you know the FST
        ///    will be large then make this larger.  For example 15
        ///    bits = 32768 byte pages. </param>
        public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, bool doShareSuffix,
                       bool doShareNonSingletonNodes, int shareMaxTailLength, Outputs <T> outputs,
                       FreezeTail <T> freezeTail, bool doPackFST, float acceptableOverheadRatio, bool allowArrayArcs,
                       int bytesPageBits)
        {
            this.minSuffixCount1          = minSuffixCount1;
            this.minSuffixCount2          = minSuffixCount2;
            this.freezeTail               = freezeTail;
            this.doShareNonSingletonNodes = doShareNonSingletonNodes;
            this.shareMaxTailLength       = shareMaxTailLength;
            this.doPackFST = doPackFST;
            this.acceptableOverheadRatio = acceptableOverheadRatio;
            fst = new FST <T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits);
            if (doShareSuffix)
            {
                dedupHash = new NodeHash <T>(fst, fst.bytes.GetReverseReader(false));
            }
            else
            {
                dedupHash = null;
            }
            NO_OUTPUT = outputs.NoOutput;

            UnCompiledNode <T>[] f = (UnCompiledNode <T>[]) new UnCompiledNode <T> [10];
            frontier = f;
            for (int idx = 0; idx < frontier.Length; idx++)
            {
                frontier[idx] = new UnCompiledNode <T>(this, idx);
            }
        }