/// <summary> /// Instantiates an FST/FSA builder with all the possible tuning and construction /// tweaks. Read parameter documentation carefully. /// </summary> /// <param name="inputType"> /// The input type (transition labels). Can be anything from <see cref="Lucene.Net.Util.Fst.FST.INPUT_TYPE"/> /// enumeration. Shorter types will consume less memory. Strings (character sequences) are /// represented as <see cref="Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE4"/> (full unicode codepoints). /// </param> /// <param name="minSuffixCount1"> /// If pruning the input graph during construction, this threshold is used for telling /// if a node is kept or pruned. If transition_count(node) >= minSuffixCount1, the node /// is kept. /// </param> /// <param name="minSuffixCount2"> /// (Note: only Mike McCandless knows what this one is really doing...) /// </param> /// <param name="doShareSuffix"> /// If <c>true</c>, the shared suffixes will be compacted into unique paths. /// this requires an additional RAM-intensive hash map for lookups in memory. Setting this parameter to /// <c>false</c> creates a single suffix path for all input sequences. this will result in a larger /// FST, but requires substantially less memory and CPU during building. /// </param> /// <param name="doShareNonSingletonNodes"> /// Only used if <paramref name="doShareSuffix"/> is <c>true</c>. Set this to /// true to ensure FST is fully minimal, at cost of more /// CPU and more RAM during building. /// </param> /// <param name="shareMaxTailLength"> /// Only used if <paramref name="doShareSuffix"/> is <c>true</c>. Set this to /// <see cref="int.MaxValue"/> to ensure FST is fully minimal, at cost of more /// CPU and more RAM during building. /// </param> /// <param name="outputs"> The output type for each input sequence. Applies only if building an FST. For /// FSA, use <see cref="NoOutputs.Singleton"/> and <see cref="NoOutputs.NoOutput"/> as the /// singleton output object. /// </param> /// <param name="doPackFST"> Pass <c>true</c> to create a packed FST. /// </param> /// <param name="acceptableOverheadRatio"> How to trade speed for space when building the FST. this option /// is only relevant when doPackFST is true. <see cref="PackedInt32s.GetMutable(int, int, float)"/> /// </param> /// <param name="allowArrayArcs"> Pass false to disable the array arc optimization /// while building the FST; this will make the resulting /// FST smaller but slower to traverse. /// </param> /// <param name="bytesPageBits"> How many bits wide to make each /// <see cref="T:byte[]"/> block in the <see cref="BytesStore"/>; if you know the FST /// will be large then make this larger. For example 15 /// bits = 32768 byte pages. </param> public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, bool doShareSuffix, bool doShareNonSingletonNodes, int shareMaxTailLength, Outputs <T> outputs, FreezeTail <T> freezeTail, bool doPackFST, float acceptableOverheadRatio, bool allowArrayArcs, int bytesPageBits) { this.minSuffixCount1 = minSuffixCount1; this.minSuffixCount2 = minSuffixCount2; this.freezeTail = freezeTail; this.doShareNonSingletonNodes = doShareNonSingletonNodes; this.shareMaxTailLength = shareMaxTailLength; this.doPackFST = doPackFST; this.acceptableOverheadRatio = acceptableOverheadRatio; fst = new FST <T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits); if (doShareSuffix) { dedupHash = new NodeHash <T>(fst, fst.bytes.GetReverseReader(false)); } else { dedupHash = null; } NO_OUTPUT = outputs.NoOutput; UnCompiledNode <T>[] f = (UnCompiledNode <T>[]) new UnCompiledNode <T> [10]; frontier = f; for (int idx = 0; idx < frontier.Length; idx++) { frontier[idx] = new UnCompiledNode <T>(this, idx); } }