/// <summary> /// Adds all leaving arcs, including 'finished' arc, if /// the node is final, from this node into the queue. /// </summary> public virtual void AddStartPaths(FST.Arc <T> node, T startOutput, bool allowEmptyString, Int32sRef input) { // De-dup NO_OUTPUT since it must be a singleton: if (startOutput.Equals(fst.Outputs.NoOutput)) { startOutput = fst.Outputs.NoOutput; } FSTPath <T> path = new FSTPath <T>(startOutput, node, input); fst.ReadFirstTargetArc(node, path.Arc, bytesReader); //System.out.println("add start paths"); // Bootstrap: find the min starting arc while (true) { if (allowEmptyString || path.Arc.Label != FST.END_LABEL) { AddIfCompetitive(path); } if (path.Arc.IsLast) { break; } fst.ReadNextArc(path.Arc, bytesReader); } }
/// <summary> /// Returns the first exact match by traversing root arcs, starting from the /// arc <paramref name="rootArcIndex"/>. /// </summary> /// <param name="rootArcIndex"> /// The first root arc index in <see cref="rootArcs"/> to consider when /// matching. /// </param> /// <param name="utf8"> /// The sequence of utf8 bytes to follow. /// </param> /// <returns> Returns the bucket number of the match or <code>-1</code> if no /// match was found. </returns> private int GetExactMatchStartingFromRootArc(int rootArcIndex, BytesRef utf8) { // Get the UTF-8 bytes representation of the input key. try { FST.Arc <object> scratch = new FST.Arc <object>(); FST.BytesReader fstReader = automaton.BytesReader; for (; rootArcIndex < rootArcs.Length; rootArcIndex++) { FST.Arc <object> rootArc = rootArcs[rootArcIndex]; FST.Arc <object> arc = scratch.CopyFrom(rootArc); // Descend into the automaton using the key as prefix. if (DescendWithPrefix(arc, utf8)) { automaton.ReadFirstTargetArc(arc, arc, fstReader); if (arc.Label == Lucene.Net.Util.Fst.FST.END_LABEL) { // Normalize prefix-encoded weight. return(rootArc.Label); } } } } catch (IOException e) { // Should never happen, but anyway. throw new Exception(e.Message, e); } // No match. return(-1); }
/// <summary> /// Cache the root node's output arcs starting with completions with the /// highest weights. /// </summary> private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton) { try { IList <FST.Arc <object> > rootArcs = new List <FST.Arc <object> >(); FST.Arc <object> arc = automaton.GetFirstArc(new FST.Arc <object>()); FST.BytesReader fstReader = automaton.BytesReader; automaton.ReadFirstTargetArc(arc, arc, fstReader); while (true) { rootArcs.Add((new FST.Arc <object>()).CopyFrom(arc)); if (arc.IsLast) { break; } automaton.ReadNextArc(arc, fstReader); } // we want highest weights first. return(rootArcs.Reverse().ToArray()); } catch (IOException e) { throw new Exception(e.Message, e); } }
/// <summary> /// Cache the root node's output arcs starting with completions with the /// highest weights. /// </summary> private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton) { try { // LUCENENET specific: Using a stack rather than List, as we want the results in reverse Stack <FST.Arc <object> > rootArcs = new Stack <FST.Arc <object> >(); FST.Arc <object> arc = automaton.GetFirstArc(new FST.Arc <object>()); FST.BytesReader fstReader = automaton.GetBytesReader(); automaton.ReadFirstTargetArc(arc, arc, fstReader); while (true) { rootArcs.Push(new FST.Arc <object>().CopyFrom(arc)); if (arc.IsLast) { break; } automaton.ReadNextArc(arc, fstReader); } // we want highest weights first. return(rootArcs.ToArray()); } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } }
/// <summary> /// Returns the first exact match by traversing root arcs, starting from the /// arc <code>rootArcIndex</code>. /// </summary> /// <param name="rootArcIndex"> /// The first root arc index in <seealso cref="#rootArcs"/> to consider when /// matching. /// </param> /// <param name="utf8"> /// The sequence of utf8 bytes to follow. /// </param> /// <returns> Returns the bucket number of the match or <code>-1</code> if no /// match was found. </returns> private int GetExactMatchStartingFromRootArc(int rootArcIndex, BytesRef utf8) { // Get the UTF-8 bytes representation of the input key. try { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> scratch = new org.apache.lucene.util.fst.FST.Arc<>(); FST.Arc <object> scratch = new FST.Arc <object>(); FST.BytesReader fstReader = automaton.BytesReader; for (; rootArcIndex < rootArcs.Length; rootArcIndex++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> rootArc = rootArcs[rootArcIndex]; FST.Arc <object> rootArc = rootArcs[rootArcIndex]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<Object> arc = scratch.copyFrom(rootArc); FST.Arc <object> arc = scratch.CopyFrom(rootArc); // Descend into the automaton using the key as prefix. if (descendWithPrefix(arc, utf8)) { automaton.ReadFirstTargetArc(arc, arc, fstReader); if (arc.Label == FST.END_LABEL) { // Normalize prefix-encoded weight. return(rootArc.Label); } } } } catch (IOException e) { // Should never happen, but anyway. throw new Exception(e); } // No match. return(-1); }
private T RandomAcceptedWord(FST <T> fst, IntsRef @in) { FST.Arc <T> arc = fst.GetFirstArc(new FST.Arc <T>()); IList <FST.Arc <T> > arcs = new List <FST.Arc <T> >(); @in.Length = 0; @in.Offset = 0; T NO_OUTPUT = fst.Outputs.NoOutput; T output = NO_OUTPUT; FST.BytesReader fstReader = fst.BytesReader; while (true) { // read all arcs: fst.ReadFirstTargetArc(arc, arc, fstReader); arcs.Add((new FST.Arc <T>()).CopyFrom(arc)); while (!arc.Last) { fst.ReadNextArc(arc, fstReader); arcs.Add((new FST.Arc <T>()).CopyFrom(arc)); } // pick one arc = arcs[Random.Next(arcs.Count)]; arcs.Clear(); // accumulate output output = fst.Outputs.Add(output, arc.Output); // append label if (arc.Label == FST <T> .END_LABEL) { break; } if (@in.Ints.Length == @in.Length) { @in.Grow(1 + @in.Length); } @in.Ints[@in.Length++] = arc.Label; } return(output); }
// Uncomment for debugging: /* * public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException { * Writer w = new OutputStreamWriter(new FileOutputStream(filePath)); * toDot(fst, w, true, true); * w.Dispose(); * } */ /// <summary> /// Reads the first arc greater or equal that the given label into the provided /// arc in place and returns it iff found, otherwise return <c>null</c>. /// </summary> /// <param name="label"> the label to ceil on </param> /// <param name="fst"> the fst to operate on </param> /// <param name="follow"> the arc to follow reading the label from </param> /// <param name="arc"> the arc to read into in place </param> /// <param name="in"> the fst's <see cref="FST.BytesReader"/> </param> public static FST.Arc <T> ReadCeilArc <T>(int label, FST <T> fst, FST.Arc <T> follow, FST.Arc <T> arc, FST.BytesReader @in) { // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum? if (label == FST.END_LABEL) { if (follow.IsFinal) { if (follow.Target <= 0) { arc.Flags = (sbyte)FST.BIT_LAST_ARC; } else { arc.Flags = 0; // NOTE: nextArc is a node (not an address!) in this case: arc.NextArc = follow.Target; arc.Node = follow.Target; } arc.Output = follow.NextFinalOutput; arc.Label = FST.END_LABEL; return(arc); } else { return(null); } } if (!FST <T> .TargetHasArcs(follow)) { return(null); } fst.ReadFirstTargetArc(follow, arc, @in); if (arc.BytesPerArc != 0 && arc.Label != FST.END_LABEL) { // Arcs are fixed array -- use binary search to find // the target. int low = arc.ArcIdx; int high = arc.NumArcs - 1; int mid = 0; // System.out.println("do arc array low=" + low + " high=" + high + // " targetLabel=" + targetLabel); while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid + 1); int midLabel = fst.ReadLabel(@in); int cmp = midLabel - label; // System.out.println(" cycle low=" + low + " high=" + high + " mid=" + // mid + " midLabel=" + midLabel + " cmp=" + cmp); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { arc.ArcIdx = mid - 1; return(fst.ReadNextRealArc(arc, @in)); } } if (low == arc.NumArcs) { // DEAD END! return(null); } arc.ArcIdx = (low > high ? high : low); return(fst.ReadNextRealArc(arc, @in)); } // Linear scan fst.ReadFirstRealTargetArc(follow.Target, arc, @in); while (true) { // System.out.println(" non-bs cycle"); // TODO: we should fix this code to not have to create // object for the output of every arc we scan... only // for the matching arc, if found if (arc.Label >= label) { // System.out.println(" found!"); return(arc); } else if (arc.IsLast) { return(null); } else { fst.ReadNextRealArc(arc, @in); } } }
/// <summary> /// Rewinds enum state to match the shared prefix between /// current term and target term /// </summary> protected void RewindPrefix() { if (m_upto == 0) { //System.out.println(" init"); m_upto = 1; m_fst.ReadFirstTargetArc(GetArc(0), GetArc(1), m_fstReader); return; } //System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength); int currentLimit = m_upto; m_upto = 1; while (m_upto < currentLimit && m_upto <= m_targetLength + 1) { int cmp = CurrentLabel - TargetLabel; if (cmp < 0) { // seek forward //System.out.println(" seek fwd"); break; } else if (cmp > 0) { // seek backwards -- reset this arc to the first arc FST.Arc <T> arc = GetArc(m_upto); m_fst.ReadFirstTargetArc(GetArc(m_upto - 1), arc, m_fstReader); //System.out.println(" seek first arc"); break; } m_upto++; } //System.out.println(" fall through upto=" + upto); }