private bool NodesEqual(Builder.UnCompiledNode <T> node, long address) { fst.ReadFirstRealTargetArc(address, scratchArc, input); if (scratchArc.BytesPerArc != 0 && node.NumArcs != scratchArc.NumArcs) { return(false); } for (int arcUpto = 0; arcUpto < node.NumArcs; arcUpto++) { Builder.Arc <T> arc = node.Arcs[arcUpto]; if (arc.Label != scratchArc.Label || !arc.Output.Equals(scratchArc.Output) || ((Builder.CompiledNode)arc.Target).Node != scratchArc.Target || !arc.NextFinalOutput.Equals(scratchArc.NextFinalOutput) || arc.IsFinal != scratchArc.IsFinal) { return(false); } if (scratchArc.IsLast) { if (arcUpto == node.NumArcs - 1) { return(true); } else { return(false); } } fst.ReadNextRealArc(scratchArc, input); } return(false); }
internal static void Walk <T>(FST <T> fst) // LUCENENET NOTE: Not referenced { List <FST.Arc <T> > queue = new List <FST.Arc <T> >(); FST.BytesReader reader = fst.GetBytesReader(); FST.Arc <T> startArc = fst.GetFirstArc(new FST.Arc <T>()); queue.Add(startArc); BitArray seen = new BitArray(queue.Count); while (queue.Count > 0) { FST.Arc <T> arc = queue[0]; queue.RemoveAt(0); long node = arc.Target; //System.out.println(arc); if (FST <T> .TargetHasArcs(arc) && !seen.SafeGet((int)node)) { seen.SafeSet((int)node, true); fst.ReadFirstRealTargetArc(node, arc, reader); while (true) { queue.Add((new FST.Arc <T>()).CopyFrom(arc)); if (arc.IsLast) { break; } else { fst.ReadNextRealArc(arc, reader); } } } } }
// Use the builder to create: private NormalizeCharMap(FST<CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: var scratchArc = new FST.Arc<CharsRef>(); FST.BytesReader fstReader = map.BytesReader; map.GetFirstArc(scratchArc); if (FST<CharsRef>.TargetHasArcs(scratchArc)) { map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader); while (true) { Debug.Assert(scratchArc.Label != FST.END_LABEL); cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc<CharsRef>()).CopyFrom(scratchArc); if (scratchArc.IsLast) { break; } map.ReadNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new Exception("Should never happen", ioe); } } }
internal static void Walk <T>(FST <T> fst) { var queue = new List <FST.Arc <T> >(); var seen = new BitArray(); var reader = fst.BytesReader; var startArc = fst.GetFirstArc(new FST.Arc <T>()); queue.Add(startArc); while (queue.Count > 0) { FST.Arc <T> arc = queue.Remove(0); long node = arc.Target; //System.out.println(arc); if (FST.TargetHasArcs(arc) && !seen.Get((int)node)) { seen.Set((int)node, true); fst.ReadFirstRealTargetArc(node, arc, reader); while (true) { queue.Add((new FST.Arc <T>()).CopyFrom(arc)); if (arc.Last) { break; } else { fst.ReadNextRealArc(arc, reader); } } } } }
// Use the builder to create: private NormalizeCharMap(FST <CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: var scratchArc = new FST.Arc <CharsRef>(); FST.BytesReader fstReader = map.BytesReader; map.GetFirstArc(scratchArc); if (FST <CharsRef> .TargetHasArcs(scratchArc)) { map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader); while (true) { Debug.Assert(scratchArc.Label != FST <CharsRef> .END_LABEL); // LUCENENET TODO END_LABEL shouldn't be under generic? cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc <CharsRef>()).CopyFrom(scratchArc); if (scratchArc.Last) { break; } map.ReadNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new Exception("Should never happen", ioe); } } }
/// <summary> /// Load frame for target arc(node) on fst. </summary> private Frame LoadExpandFrame(Frame top, Frame frame) { if (!CanGrow(top)) { return(null); } frame.fstArc = fst.ReadFirstRealTargetArc(top.fstArc.Target, frame.fstArc, fstReader); frame.fsaState = fsa.Step(top.fsaState, frame.fstArc.Label); //if (TEST) System.out.println(" loadExpand frame="+frame); if (frame.fsaState == -1) { return(LoadNextFrame(top, frame)); } return(frame); }
private static void Walk <T>(FST <T> fst) // LUCENENET NOTE: Not referenced anywhere { var queue = new List <FST.Arc <T> >(); // Java version was BitSet(), but in .NET we don't have a zero contructor BitSet. // Couldn't find the default size in BitSet, so went with zero here. var seen = new BitSet(); var reader = fst.GetBytesReader(); var startArc = fst.GetFirstArc(new FST.Arc <T>()); queue.Add(startArc); while (queue.Count > 0) { //FST.Arc<T> arc = queue.Remove(0); var arc = queue[0]; queue.RemoveAt(0); long node = arc.Target; //System.out.println(arc); if (FST <T> .TargetHasArcs(arc) && !seen.Get((int)node)) { seen.Set((int)node); fst.ReadFirstRealTargetArc(node, arc, reader); while (true) { queue.Add((new FST.Arc <T>()).CopyFrom(arc)); if (arc.IsLast) { break; } else { fst.ReadNextRealArc(arc, reader); } } } } }
// Use the builder to create: private NormalizeCharMap(FST <CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: var scratchArc = new FST.Arc <CharsRef>(); FST.BytesReader fstReader = map.GetBytesReader(); map.GetFirstArc(scratchArc); if (FST <CharsRef> .TargetHasArcs(scratchArc)) { map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader); while (true) { if (Debugging.AssertsEnabled) { Debugging.Assert(scratchArc.Label != FST.END_LABEL); } cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc <CharsRef>()).CopyFrom(scratchArc); if (scratchArc.IsLast) { break; } map.ReadNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (Exception ioe) when(ioe.IsIOException()) { // Bogus FST IOExceptions!! (will never happen) throw RuntimeException.Create("Should never happen", ioe); } } }
/// <summary> /// Dumps an <see cref="FST{T}"/> to a GraphViz's <c>dot</c> language description /// for visualization. Example of use: /// /// <code> /// using (TextWriter sw = new StreamWriter("out.dot")) /// { /// Util.ToDot(fst, sw, true, true); /// } /// </code> /// /// and then, from command line: /// /// <code> /// dot -Tpng -o out.png out.dot /// </code> /// /// <para/> /// Note: larger FSTs (a few thousand nodes) won't even /// render, don't bother. If the FST is > 2.1 GB in size /// then this method will throw strange exceptions. /// <para/> /// See also <a href="http://www.graphviz.org/">http://www.graphviz.org/</a>. /// </summary> /// <param name="sameRank"> /// If <c>true</c>, the resulting <c>dot</c> file will try /// to order states in layers of breadth-first traversal. This may /// mess up arcs, but makes the output FST's structure a bit clearer. /// </param> /// <param name="labelStates"> /// If <c>true</c> states will have labels equal to their offsets in their /// binary format. Expands the graph considerably. /// </param> public static void ToDot <T>(FST <T> fst, TextWriter @out, bool sameRank, bool labelStates) { const string expandedNodeColor = "blue"; // this is the start arc in the automaton (from the epsilon state to the first state // with outgoing transitions. FST.Arc <T> startArc = fst.GetFirstArc(new FST.Arc <T>()); // A queue of transitions to consider for the next level. IList <FST.Arc <T> > thisLevelQueue = new List <FST.Arc <T> >(); // A queue of transitions to consider when processing the next level. IList <FST.Arc <T> > nextLevelQueue = new List <FST.Arc <T> >(); nextLevelQueue.Add(startArc); //System.out.println("toDot: startArc: " + startArc); // A list of states on the same level (for ranking). IList <int?> sameLevelStates = new List <int?>(); // A bitset of already seen states (target offset). BitArray seen = new BitArray(32); seen.SafeSet((int)startArc.Target, true); // Shape for states. const string stateShape = "circle"; const string finalStateShape = "doublecircle"; // Emit DOT prologue. @out.Write("digraph FST {\n"); @out.Write(" rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n"); if (!labelStates) { @out.Write(" node [shape=circle, width=.2, height=.2, style=filled]\n"); } EmitDotState(@out, "initial", "point", "white", ""); T NO_OUTPUT = fst.Outputs.NoOutput; var r = fst.GetBytesReader(); // final FST.Arc<T> scratchArc = new FST.Arc<>(); { string stateColor; if (fst.IsExpandedTarget(startArc, r)) { stateColor = expandedNodeColor; } else { stateColor = null; } bool isFinal; T finalOutput; if (startArc.IsFinal) { isFinal = true; finalOutput = startArc.NextFinalOutput.Equals(NO_OUTPUT) ? default(T) : startArc.NextFinalOutput; } else { isFinal = false; finalOutput = default(T); } EmitDotState(@out, Convert.ToString(startArc.Target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.Outputs.OutputToString(finalOutput)); } @out.Write(" initial -> " + startArc.Target + "\n"); int level = 0; while (nextLevelQueue.Count > 0) { // we could double buffer here, but it doesn't matter probably. //System.out.println("next level=" + level); thisLevelQueue.AddRange(nextLevelQueue); nextLevelQueue.Clear(); level++; @out.Write("\n // Transitions and states at level: " + level + "\n"); while (thisLevelQueue.Count > 0) { FST.Arc <T> arc = thisLevelQueue[thisLevelQueue.Count - 1]; thisLevelQueue.RemoveAt(thisLevelQueue.Count - 1); //System.out.println(" pop: " + arc); if (FST <T> .TargetHasArcs(arc)) { // scan all target arcs //System.out.println(" readFirstTarget..."); long node = arc.Target; fst.ReadFirstRealTargetArc(arc.Target, arc, r); //System.out.println(" firstTarget: " + arc); while (true) { //System.out.println(" cycle arc=" + arc); // Emit the unseen state and add it to the queue for the next level. if (arc.Target >= 0 && !seen.SafeGet((int)arc.Target)) { /* * boolean isFinal = false; * T finalOutput = null; * fst.readFirstTargetArc(arc, scratchArc); * if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) { * // target is final * isFinal = true; * finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output; * System.out.println("dot hit final label=" + (char) scratchArc.label); * } */ string stateColor; if (fst.IsExpandedTarget(arc, r)) { stateColor = expandedNodeColor; } else { stateColor = null; } string finalOutput; if (arc.NextFinalOutput != null && !arc.NextFinalOutput.Equals(NO_OUTPUT)) { finalOutput = fst.Outputs.OutputToString(arc.NextFinalOutput); } else { finalOutput = ""; } EmitDotState(@out, Convert.ToString(arc.Target), stateShape, stateColor, finalOutput); // To see the node address, use this instead: //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target)); seen.SafeSet((int)arc.Target, true); nextLevelQueue.Add((new FST.Arc <T>()).CopyFrom(arc)); sameLevelStates.Add((int)arc.Target); } string outs; if (!arc.Output.Equals(NO_OUTPUT)) { outs = "/" + fst.Outputs.OutputToString(arc.Output); } else { outs = ""; } if (!FST <T> .TargetHasArcs(arc) && arc.IsFinal && !arc.NextFinalOutput.Equals(NO_OUTPUT)) { // Tricky special case: sometimes, due to // pruning, the builder can [sillily] produce // an FST with an arc into the final end state // (-1) but also with a next final output; in // this case we pull that output up onto this // arc outs = outs + "/[" + fst.Outputs.OutputToString(arc.NextFinalOutput) + "]"; } string arcColor; if (arc.Flag(FST.BIT_TARGET_NEXT)) { arcColor = "red"; } else { arcColor = "black"; } Debug.Assert(arc.Label != FST.END_LABEL); @out.Write(" " + node + " -> " + arc.Target + " [label=\"" + PrintableLabel(arc.Label) + outs + "\"" + (arc.IsFinal ? " style=\"bold\"" : "") + " color=\"" + arcColor + "\"]\n"); // Break the loop if we're on the last arc of this state. if (arc.IsLast) { //System.out.println(" break"); break; } fst.ReadNextRealArc(arc, r); } } } // Emit state ranking information. if (sameRank && sameLevelStates.Count > 1) { @out.Write(" {rank=same; "); foreach (int state in sameLevelStates) { @out.Write(state + "; "); } @out.Write(" }\n"); } sameLevelStates.Clear(); } // Emit terminating state (always there anyway). @out.Write(" -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n"); @out.Write(" {rank=sink; -1 }\n"); @out.Write("}\n"); @out.Flush(); }
/// <summary> /// Expert: like <see cref="Util.GetByOutput(FST{long?}, long)"/> except reusing /// <see cref="FST.BytesReader"/>, initial and scratch Arc, and result. /// </summary> public static Int32sRef GetByOutput(FST <long?> fst, long targetOutput, FST.BytesReader @in, FST.Arc <long?> arc, FST.Arc <long?> scratchArc, Int32sRef result) { long output = arc.Output.Value; int upto = 0; //System.out.println("reverseLookup output=" + targetOutput); while (true) { //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); if (arc.IsFinal) { long finalOutput = output + arc.NextFinalOutput.Value; //System.out.println(" isFinal finalOutput=" + finalOutput); if (finalOutput == targetOutput) { result.Length = upto; //System.out.println(" found!"); return(result); } else if (finalOutput > targetOutput) { //System.out.println(" not found!"); return(null); } } if (FST <long?> .TargetHasArcs(arc)) { //System.out.println(" targetHasArcs"); if (result.Int32s.Length == upto) { result.Grow(1 + upto); } fst.ReadFirstRealTargetArc(arc.Target, arc, @in); if (arc.BytesPerArc != 0) { int low = 0; int high = arc.NumArcs - 1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); bool exact = false; while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid); var flags = (sbyte)@in.ReadByte(); fst.ReadLabel(@in); long minArcOutput; if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) { long arcOutput = fst.Outputs.Read(@in).Value; minArcOutput = output + arcOutput; } else { minArcOutput = output; } if (minArcOutput == targetOutput) { exact = true; break; } else if (minArcOutput < targetOutput) { low = mid + 1; } else { high = mid - 1; } } if (high == -1) { return(null); } else if (exact) { arc.ArcIdx = mid - 1; } else { arc.ArcIdx = low - 2; } fst.ReadNextRealArc(arc, @in); result.Int32s[upto++] = arc.Label; output += arc.Output.Value; } else { FST.Arc <long?> prevArc = null; while (true) { //System.out.println(" cycle label=" + arc.label + " output=" + arc.output); // this is the min output we'd hit if we follow // this arc: long minArcOutput = output + arc.Output.Value; if (minArcOutput == targetOutput) { // Recurse on this arc: //System.out.println(" match! break"); output = minArcOutput; result.Int32s[upto++] = arc.Label; break; } else if (minArcOutput > targetOutput) { if (prevArc == null) { // Output doesn't exist return(null); } else { // Recurse on previous arc: arc.CopyFrom(prevArc); result.Int32s[upto++] = arc.Label; output += arc.Output.Value; //System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output); break; } } else if (arc.IsLast) { // Recurse on this arc: output = minArcOutput; //System.out.println(" recurse last label=" + (char) arc.label + " output=" + output); result.Int32s[upto++] = arc.Label; break; } else { // Read next arc in this node: prevArc = scratchArc; prevArc.CopyFrom(arc); //System.out.println(" after copy label=" + (char) prevArc.label + " vs " + (char) arc.label); fst.ReadNextRealArc(arc, @in); } } } } else { //System.out.println(" no target arcs; not found!"); return(null); } } }
// Uncomment for debugging: /* * public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException { * Writer w = new OutputStreamWriter(new FileOutputStream(filePath)); * toDot(fst, w, true, true); * w.Dispose(); * } */ /// <summary> /// Reads the first arc greater or equal that the given label into the provided /// arc in place and returns it iff found, otherwise return <c>null</c>. /// </summary> /// <param name="label"> the label to ceil on </param> /// <param name="fst"> the fst to operate on </param> /// <param name="follow"> the arc to follow reading the label from </param> /// <param name="arc"> the arc to read into in place </param> /// <param name="in"> the fst's <see cref="FST.BytesReader"/> </param> public static FST.Arc <T> ReadCeilArc <T>(int label, FST <T> fst, FST.Arc <T> follow, FST.Arc <T> arc, FST.BytesReader @in) { // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum? if (label == FST.END_LABEL) { if (follow.IsFinal) { if (follow.Target <= 0) { arc.Flags = (sbyte)FST.BIT_LAST_ARC; } else { arc.Flags = 0; // NOTE: nextArc is a node (not an address!) in this case: arc.NextArc = follow.Target; arc.Node = follow.Target; } arc.Output = follow.NextFinalOutput; arc.Label = FST.END_LABEL; return(arc); } else { return(null); } } if (!FST <T> .TargetHasArcs(follow)) { return(null); } fst.ReadFirstTargetArc(follow, arc, @in); if (arc.BytesPerArc != 0 && arc.Label != FST.END_LABEL) { // Arcs are fixed array -- use binary search to find // the target. int low = arc.ArcIdx; int high = arc.NumArcs - 1; int mid = 0; // System.out.println("do arc array low=" + low + " high=" + high + // " targetLabel=" + targetLabel); while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid + 1); int midLabel = fst.ReadLabel(@in); int cmp = midLabel - label; // System.out.println(" cycle low=" + low + " high=" + high + " mid=" + // mid + " midLabel=" + midLabel + " cmp=" + cmp); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { arc.ArcIdx = mid - 1; return(fst.ReadNextRealArc(arc, @in)); } } if (low == arc.NumArcs) { // DEAD END! return(null); } arc.ArcIdx = (low > high ? high : low); return(fst.ReadNextRealArc(arc, @in)); } // Linear scan fst.ReadFirstRealTargetArc(follow.Target, arc, @in); while (true) { // System.out.println(" non-bs cycle"); // TODO: we should fix this code to not have to create // object for the output of every arc we scan... only // for the matching arc, if found if (arc.Label >= label) { // System.out.println(" found!"); return(arc); } else if (arc.IsLast) { return(null); } else { fst.ReadNextRealArc(arc, @in); } } }