public virtual TopResults <T> Search() { IList <Result <T> > results = new List <Result <T> >(); //System.out.println("search topN=" + topN); var fstReader = Fst.BytesReader; T NO_OUTPUT = Fst.Outputs.NoOutput; // TODO: we could enable FST to sorting arcs by weight // as it freezes... can easily do this on first pass // (w/o requiring rewrite) // TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!? // (nibbles) int rejectCount = 0; // For each top N path: while (results.Count < TopN) { //System.out.println("\nfind next path: queue.size=" + queue.size()); FSTPath <T> path; if (Queue == null) { // Ran out of paths //System.out.println(" break queue=null"); break; } // Remove top path since we are now going to // pursue it: path = Queue.First(); if (path == null) { // There were less than topN paths available: //System.out.println(" break no more paths"); break; } if (path.Arc.Label == FST <T> .END_LABEL) { //System.out.println(" empty string! cost=" + path.cost); // Empty string! path.Input.Length--; results.Add(new Result <T>(path.Input, path.Cost)); continue; } if (results.Count == TopN - 1 && MaxQueueDepth == TopN) { // Last path -- don't bother w/ queue anymore: Queue = null; } //System.out.println(" path: " + path); // We take path and find its "0 output completion", // ie, just keep traversing the first arc with // NO_OUTPUT that we can find, since this must lead // to the minimum path that completes from // path.arc. // For each input letter: while (true) { //System.out.println("\n cycle path: " + path); Fst.ReadFirstTargetArc(path.Arc, path.Arc, fstReader); // For each arc leaving this node: bool foundZero = false; while (true) { //System.out.println(" arc=" + (char) path.arc.label + " cost=" + path.arc.output); // tricky: instead of comparing output == 0, we must // express it via the comparator compare(output, 0) == 0 if (Comparator.Compare(NO_OUTPUT, path.Arc.Output) == 0) { if (Queue == null) { foundZero = true; break; } else if (!foundZero) { ScratchArc.CopyFrom(path.Arc); foundZero = true; } else { AddIfCompetitive(path); } } else if (Queue != null) { AddIfCompetitive(path); } if (path.Arc.Last) { break; } Fst.ReadNextArc(path.Arc, fstReader); } Debug.Assert(foundZero); if (Queue != null) { // TODO: maybe we can save this copyFrom if we // are more clever above... eg on finding the // first NO_OUTPUT arc we'd switch to using // scratchArc path.Arc.CopyFrom(ScratchArc); } if (path.Arc.Label == FST <T> .END_LABEL) { // Add final output: //System.out.println(" done!: " + path); T finalOutput = Fst.Outputs.Add(path.Cost, path.Arc.Output); if (AcceptResult(path.Input, finalOutput)) { //System.out.println(" add result: " + path); results.Add(new Result <T>(path.Input, finalOutput)); } else { rejectCount++; } break; } else { path.Input.Grow(1 + path.Input.Length); path.Input.Ints[path.Input.Length] = path.Arc.Label; path.Input.Length++; path.Cost = Fst.Outputs.Add(path.Cost, path.Arc.Output); } } } return(new TopResults <T>(rejectCount + TopN <= MaxQueueDepth, results)); }
/// <summary> /// Expert: like <seealso cref="Util#getByOutput(FST, long)"/> except reusing /// BytesReader, initial and scratch Arc, and result. /// </summary> public static IntsRef GetByOutput(FST <long?> fst, long targetOutput, FST <long?> .BytesReader @in, FST <long?> .Arc <long?> arc, FST <long?> .Arc <long?> scratchArc, IntsRef result) { long output = arc.Output.Value; int upto = 0; //System.out.println("reverseLookup output=" + targetOutput); while (true) { //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc); if (arc.Final) { long finalOutput = output + arc.NextFinalOutput.Value; //System.out.println(" isFinal finalOutput=" + finalOutput); if (finalOutput == targetOutput) { result.Length = upto; //System.out.println(" found!"); return(result); } else if (finalOutput > targetOutput) { //System.out.println(" not found!"); return(null); } } if (FST <long?> .TargetHasArcs(arc)) { //System.out.println(" targetHasArcs"); if (result.Ints.Length == upto) { result.Grow(1 + upto); } fst.ReadFirstRealTargetArc(arc.Target, arc, @in); if (arc.BytesPerArc != 0) { int low = 0; int high = arc.NumArcs - 1; int mid = 0; //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output); bool exact = false; while (low <= high) { mid = (int)((uint)(low + high) >> 1); @in.Position = arc.PosArcsStart; @in.SkipBytes(arc.BytesPerArc * mid); var flags = (sbyte)@in.ReadByte(); fst.ReadLabel(@in); long minArcOutput; if ((flags & FST <long> .BIT_ARC_HAS_OUTPUT) != 0) { long arcOutput = fst.Outputs.Read(@in).Value; minArcOutput = output + arcOutput; } else { minArcOutput = output; } if (minArcOutput == targetOutput) { exact = true; break; } else if (minArcOutput < targetOutput) { low = mid + 1; } else { high = mid - 1; } } if (high == -1) { return(null); } else if (exact) { arc.ArcIdx = mid - 1; } else { arc.ArcIdx = low - 2; } fst.ReadNextRealArc(arc, @in); result.Ints[upto++] = arc.Label; output += arc.Output.Value; } else { FST <long?> .Arc <long?> prevArc = null; while (true) { //System.out.println(" cycle label=" + arc.label + " output=" + arc.output); // this is the min output we'd hit if we follow // this arc: long minArcOutput = output + arc.Output.Value; if (minArcOutput == targetOutput) { // Recurse on this arc: //System.out.println(" match! break"); output = minArcOutput; result.Ints[upto++] = arc.Label; break; } else if (minArcOutput > targetOutput) { if (prevArc == null) { // Output doesn't exist return(null); } else { // Recurse on previous arc: arc.CopyFrom(prevArc); result.Ints[upto++] = arc.Label; output += arc.Output.Value; //System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output); break; } } else if (arc.Last) { // Recurse on this arc: output = minArcOutput; //System.out.println(" recurse last label=" + (char) arc.label + " output=" + output); result.Ints[upto++] = arc.Label; break; } else { // Read next arc in this node: prevArc = scratchArc; prevArc.CopyFrom(arc); //System.out.println(" after copy label=" + (char) prevArc.label + " vs " + (char) arc.label); fst.ReadNextRealArc(arc, @in); } } } } else { //System.out.println(" no target arcs; not found!"); return(null); } } }