// Uncomment for debugging: /* * public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException { * Writer w = new OutputStreamWriter(new FileOutputStream(filePath)); * toDot(fst, w, true, true); * w.Dispose(); * } */ /// <summary> /// Reads the first arc greater or equal that the given label into the provided /// arc in place and returns it iff found, otherwise return <c>null</c>. /// </summary> /// <param name="label"> the label to ceil on </param> /// <param name="fst"> the fst to operate on </param> /// <param name="follow"> the arc to follow reading the label from </param> /// <param name="arc"> the arc to read into in place </param> /// <param name="in"> the fst's <see cref="FST.BytesReader"/> </param> public static FST.Arc <T> ReadCeilArc <T>(int label, FST <T> fst, FST.Arc <T> follow, FST.Arc <T> arc, FST.BytesReader @in) { // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum? if (label == FST.END_LABEL) { if (follow.IsFinal) { if (follow.Target <= 0) { arc.Flags = (sbyte)FST.BIT_LAST_ARC; } else { arc.Flags = 0; // NOTE: nextArc is a node (not an address!) in this case: arc.NextArc = follow.Target; arc.Node = follow.Target; } arc.Output = follow.NextFinalOutput; arc.Label = FST.END_LABEL; return(arc); } else { return(null); } } if (!FST <T> .TargetHasArcs(follow)) { return(null); } fst.ReadFirstTargetArc(follow, arc, @in); if (arc.BytesPerArc != 0 && arc.Label != FST.END_LABEL) { // Arcs are fixed array -- use binary search to find // the target. int low = arc.ArcIdx; int high = arc.NumArcs - 1; int mid = 0; // System.out.println("do arc array low=" + low + " high=" + high + // " targetLabel=" + targetLabel); while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid + 1); int midLabel = fst.ReadLabel(@in); int cmp = midLabel - label; // System.out.println(" cycle low=" + low + " high=" + high + " mid=" + // mid + " midLabel=" + midLabel + " cmp=" + cmp); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { arc.ArcIdx = mid - 1; return(fst.ReadNextRealArc(arc, @in)); } } if (low == arc.NumArcs) { // DEAD END! return(null); } arc.ArcIdx = (low > high ? high : low); return(fst.ReadNextRealArc(arc, @in)); } // Linear scan fst.ReadFirstRealTargetArc(follow.Target, arc, @in); while (true) { // System.out.println(" non-bs cycle"); // TODO: we should fix this code to not have to create // object for the output of every arc we scan... only // for the matching arc, if found if (arc.Label >= label) { // System.out.println(" found!"); return(arc); } else if (arc.IsLast) { return(null); } else { fst.ReadNextRealArc(arc, @in); } } }
/// <summary> /// Expert: like <see cref="Util.GetByOutput(FST{long?}, long)"/> except reusing /// <see cref="FST.BytesReader"/>, initial and scratch Arc, and result. /// </summary> public static Int32sRef GetByOutput(FST <long?> fst, long targetOutput, FST.BytesReader @in, FST.Arc <long?> arc, FST.Arc <long?> scratchArc, Int32sRef result) { long output = arc.Output.Value; int upto = 0; //System.out.println("reverseLookup output=" + targetOutput); while (true) { //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); if (arc.IsFinal) { long finalOutput = output + arc.NextFinalOutput.Value; //System.out.println(" isFinal finalOutput=" + finalOutput); if (finalOutput == targetOutput) { result.Length = upto; //System.out.println(" found!"); return(result); } else if (finalOutput > targetOutput) { //System.out.println(" not found!"); return(null); } } if (FST <long?> .TargetHasArcs(arc)) { //System.out.println(" targetHasArcs"); if (result.Int32s.Length == upto) { result.Grow(1 + upto); } fst.ReadFirstRealTargetArc(arc.Target, arc, @in); if (arc.BytesPerArc != 0) { int low = 0; int high = arc.NumArcs - 1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); bool exact = false; while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid); var flags = (sbyte)@in.ReadByte(); fst.ReadLabel(@in); long minArcOutput; if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) { long arcOutput = fst.Outputs.Read(@in).Value; minArcOutput = output + arcOutput; } else { minArcOutput = output; } if (minArcOutput == targetOutput) { exact = true; break; } else if (minArcOutput < targetOutput) { low = mid + 1; } else { high = mid - 1; } } if (high == -1) { return(null); } else if (exact) { arc.ArcIdx = mid - 1; } else { arc.ArcIdx = low - 2; } fst.ReadNextRealArc(arc, @in); result.Int32s[upto++] = arc.Label; output += arc.Output.Value; } else { FST.Arc <long?> prevArc = null; while (true) { //System.out.println(" cycle label=" + arc.label + " output=" + arc.output); // this is the min output we'd hit if we follow // this arc: long minArcOutput = output + arc.Output.Value; if (minArcOutput == targetOutput) { // Recurse on this arc: //System.out.println(" match! break"); output = minArcOutput; result.Int32s[upto++] = arc.Label; break; } else if (minArcOutput > targetOutput) { if (prevArc == null) { // Output doesn't exist return(null); } else { // Recurse on previous arc: arc.CopyFrom(prevArc); result.Int32s[upto++] = arc.Label; output += arc.Output.Value; //System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output); break; } } else if (arc.IsLast) { // Recurse on this arc: output = minArcOutput; //System.out.println(" recurse last label=" + (char) arc.label + " output=" + output); result.Int32s[upto++] = arc.Label; break; } else { // Read next arc in this node: prevArc = scratchArc; prevArc.CopyFrom(arc); //System.out.println(" after copy label=" + (char) prevArc.label + " vs " + (char) arc.label); fst.ReadNextRealArc(arc, @in); } } } } else { //System.out.println(" no target arcs; not found!"); return(null); } } }
// TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND / // SEEK_END)? saves the eq check above? /// <summary> /// Seeks to smallest term that's >= target. </summary> protected virtual void DoSeekCeil() { //System.out.println(" advance len=" + target.length + " curlen=" + current.length); // TODO: possibly caller could/should provide common // prefix length? ie this work may be redundant if // caller is in fact intersecting against its own // automaton //System.out.println("FE.seekCeil upto=" + upto); // Save time by starting at the end of the shared prefix // b/w our current term & the target: RewindPrefix(); //System.out.println(" after rewind upto=" + upto); FST.Arc <T> arc = GetArc(m_upto); int targetLabel = TargetLabel; //System.out.println(" init targetLabel=" + targetLabel); // Now scan forward, matching the new suffix of the target while (true) { //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel); if (arc.BytesPerArc != 0 && arc.Label != -1) { // Arcs are fixed array -- use binary search to find // the target. FST.BytesReader @in = m_fst.GetBytesReader(); int low = arc.ArcIdx; int high = arc.NumArcs - 1; int mid = 0; //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel); bool found = false; while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid + 1); int midLabel = m_fst.ReadLabel(@in); int cmp = midLabel - targetLabel; //System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { found = true; break; } } // NOTE: this code is dup'd w/ the code below (in // the outer else clause): if (found) { // Match arc.ArcIdx = mid - 1; m_fst.ReadNextRealArc(arc, @in); if (Debugging.AssertsEnabled) { Debugging.Assert(arc.ArcIdx == mid); Debugging.Assert(arc.Label == targetLabel, "arc.label={0} vs targetLabel={1} mid={2}", arc.Label, targetLabel, mid); } m_output[m_upto] = m_fst.Outputs.Add(m_output[m_upto - 1], arc.Output); if (targetLabel == FST.END_LABEL) { return; } CurrentLabel = arc.Label; Incr(); arc = m_fst.ReadFirstTargetArc(arc, GetArc(m_upto), m_fstReader); targetLabel = TargetLabel; continue; } else if (low == arc.NumArcs) { // Dead end arc.ArcIdx = arc.NumArcs - 2; m_fst.ReadNextRealArc(arc, @in); if (Debugging.AssertsEnabled) { Debugging.Assert(arc.IsLast); } // Dead end (target is after the last arc); // rollback to last fork then push m_upto--; while (true) { if (m_upto == 0) { return; } FST.Arc <T> prevArc = GetArc(m_upto); //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); if (!prevArc.IsLast) { m_fst.ReadNextArc(prevArc, m_fstReader); PushFirst(); return; } m_upto--; } } else { arc.ArcIdx = (low > high ? low : high) - 1; m_fst.ReadNextRealArc(arc, @in); if (Debugging.AssertsEnabled) { Debugging.Assert(arc.Label > targetLabel); } PushFirst(); return; } } else { // Arcs are not array'd -- must do linear scan: if (arc.Label == targetLabel) { // recurse m_output[m_upto] = m_fst.Outputs.Add(m_output[m_upto - 1], arc.Output); if (targetLabel == FST.END_LABEL) { return; } CurrentLabel = arc.Label; Incr(); arc = m_fst.ReadFirstTargetArc(arc, GetArc(m_upto), m_fstReader); targetLabel = TargetLabel; } else if (arc.Label > targetLabel) { PushFirst(); return; } else if (arc.IsLast) { // Dead end (target is after the last arc); // rollback to last fork then push m_upto--; while (true) { if (m_upto == 0) { return; } FST.Arc <T> prevArc = GetArc(m_upto); //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); if (!prevArc.IsLast) { m_fst.ReadNextArc(prevArc, m_fstReader); PushFirst(); return; } m_upto--; } } else { // keep scanning //System.out.println(" next scan"); m_fst.ReadNextArc(arc, m_fstReader); } } } }