예제 #1
0
            public virtual TopResults <T> Search()
            {
                IList <Result <T> > results = new List <Result <T> >();

                //System.out.println("search topN=" + topN);

                var fstReader = Fst.BytesReader;
                T   NO_OUTPUT = Fst.Outputs.NoOutput;

                // TODO: we could enable FST to sorting arcs by weight
                // as it freezes... can easily do this on first pass
                // (w/o requiring rewrite)

                // TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!?
                // (nibbles)
                int rejectCount = 0;

                // For each top N path:
                while (results.Count < TopN)
                {
                    //System.out.println("\nfind next path: queue.size=" + queue.size());

                    FSTPath <T> path;

                    if (Queue == null)
                    {
                        // Ran out of paths
                        //System.out.println("  break queue=null");
                        break;
                    }

                    // Remove top path since we are now going to
                    // pursue it:
                    path = Queue.First();

                    if (path == null)
                    {
                        // There were less than topN paths available:
                        //System.out.println("  break no more paths");
                        break;
                    }

                    if (path.Arc.Label == FST <T> .END_LABEL)
                    {
                        //System.out.println("    empty string!  cost=" + path.cost);
                        // Empty string!
                        path.Input.Length--;
                        results.Add(new Result <T>(path.Input, path.Cost));
                        continue;
                    }

                    if (results.Count == TopN - 1 && MaxQueueDepth == TopN)
                    {
                        // Last path -- don't bother w/ queue anymore:
                        Queue = null;
                    }

                    //System.out.println("  path: " + path);

                    // We take path and find its "0 output completion",
                    // ie, just keep traversing the first arc with
                    // NO_OUTPUT that we can find, since this must lead
                    // to the minimum path that completes from
                    // path.arc.

                    // For each input letter:
                    while (true)
                    {
                        //System.out.println("\n    cycle path: " + path);
                        Fst.ReadFirstTargetArc(path.Arc, path.Arc, fstReader);

                        // For each arc leaving this node:
                        bool foundZero = false;
                        while (true)
                        {
                            //System.out.println("      arc=" + (char) path.arc.label + " cost=" + path.arc.output);
                            // tricky: instead of comparing output == 0, we must
                            // express it via the comparator compare(output, 0) == 0
                            if (Comparator.Compare(NO_OUTPUT, path.Arc.Output) == 0)
                            {
                                if (Queue == null)
                                {
                                    foundZero = true;
                                    break;
                                }
                                else if (!foundZero)
                                {
                                    ScratchArc.CopyFrom(path.Arc);
                                    foundZero = true;
                                }
                                else
                                {
                                    AddIfCompetitive(path);
                                }
                            }
                            else if (Queue != null)
                            {
                                AddIfCompetitive(path);
                            }
                            if (path.Arc.Last)
                            {
                                break;
                            }
                            Fst.ReadNextArc(path.Arc, fstReader);
                        }

                        Debug.Assert(foundZero);

                        if (Queue != null)
                        {
                            // TODO: maybe we can save this copyFrom if we
                            // are more clever above... eg on finding the
                            // first NO_OUTPUT arc we'd switch to using
                            // scratchArc
                            path.Arc.CopyFrom(ScratchArc);
                        }

                        if (path.Arc.Label == FST <T> .END_LABEL)
                        {
                            // Add final output:
                            //System.out.println("    done!: " + path);
                            T finalOutput = Fst.Outputs.Add(path.Cost, path.Arc.Output);
                            if (AcceptResult(path.Input, finalOutput))
                            {
                                //System.out.println("    add result: " + path);
                                results.Add(new Result <T>(path.Input, finalOutput));
                            }
                            else
                            {
                                rejectCount++;
                            }
                            break;
                        }
                        else
                        {
                            path.Input.Grow(1 + path.Input.Length);
                            path.Input.Ints[path.Input.Length] = path.Arc.Label;
                            path.Input.Length++;
                            path.Cost = Fst.Outputs.Add(path.Cost, path.Arc.Output);
                        }
                    }
                }
                return(new TopResults <T>(rejectCount + TopN <= MaxQueueDepth, results));
            }
예제 #2
0
        /// <summary>
        /// Expert: like <seealso cref="Util#getByOutput(FST, long)"/> except reusing
        /// BytesReader, initial and scratch Arc, and result.
        /// </summary>
        public static IntsRef GetByOutput(FST <long?> fst, long targetOutput, FST <long?> .BytesReader @in, FST <long?> .Arc <long?> arc, FST <long?> .Arc <long?> scratchArc, IntsRef result)
        {
            long output = arc.Output.Value;
            int  upto   = 0;

            //System.out.println("reverseLookup output=" + targetOutput);

            while (true)
            {
                //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
                if (arc.Final)
                {
                    long finalOutput = output + arc.NextFinalOutput.Value;
                    //System.out.println("  isFinal finalOutput=" + finalOutput);
                    if (finalOutput == targetOutput)
                    {
                        result.Length = upto;
                        //System.out.println("    found!");
                        return(result);
                    }
                    else if (finalOutput > targetOutput)
                    {
                        //System.out.println("    not found!");
                        return(null);
                    }
                }

                if (FST <long?> .TargetHasArcs(arc))
                {
                    //System.out.println("  targetHasArcs");
                    if (result.Ints.Length == upto)
                    {
                        result.Grow(1 + upto);
                    }

                    fst.ReadFirstRealTargetArc(arc.Target, arc, @in);

                    if (arc.BytesPerArc != 0)
                    {
                        int low  = 0;
                        int high = arc.NumArcs - 1;
                        int mid  = 0;
                        //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output);
                        bool exact = false;
                        while (low <= high)
                        {
                            mid          = (int)((uint)(low + high) >> 1);
                            @in.Position = arc.PosArcsStart;
                            @in.SkipBytes(arc.BytesPerArc * mid);
                            var flags = (sbyte)@in.ReadByte();
                            fst.ReadLabel(@in);
                            long minArcOutput;
                            if ((flags & FST <long> .BIT_ARC_HAS_OUTPUT) != 0)
                            {
                                long arcOutput = fst.Outputs.Read(@in).Value;
                                minArcOutput = output + arcOutput;
                            }
                            else
                            {
                                minArcOutput = output;
                            }
                            if (minArcOutput == targetOutput)
                            {
                                exact = true;
                                break;
                            }
                            else if (minArcOutput < targetOutput)
                            {
                                low = mid + 1;
                            }
                            else
                            {
                                high = mid - 1;
                            }
                        }

                        if (high == -1)
                        {
                            return(null);
                        }
                        else if (exact)
                        {
                            arc.ArcIdx = mid - 1;
                        }
                        else
                        {
                            arc.ArcIdx = low - 2;
                        }

                        fst.ReadNextRealArc(arc, @in);
                        result.Ints[upto++] = arc.Label;
                        output += arc.Output.Value;
                    }
                    else
                    {
                        FST <long?> .Arc <long?> prevArc = null;

                        while (true)
                        {
                            //System.out.println("    cycle label=" + arc.label + " output=" + arc.output);

                            // this is the min output we'd hit if we follow
                            // this arc:
                            long minArcOutput = output + arc.Output.Value;

                            if (minArcOutput == targetOutput)
                            {
                                // Recurse on this arc:
                                //System.out.println("  match!  break");
                                output = minArcOutput;
                                result.Ints[upto++] = arc.Label;
                                break;
                            }
                            else if (minArcOutput > targetOutput)
                            {
                                if (prevArc == null)
                                {
                                    // Output doesn't exist
                                    return(null);
                                }
                                else
                                {
                                    // Recurse on previous arc:
                                    arc.CopyFrom(prevArc);
                                    result.Ints[upto++] = arc.Label;
                                    output += arc.Output.Value;
                                    //System.out.println("    recurse prev label=" + (char) arc.label + " output=" + output);
                                    break;
                                }
                            }
                            else if (arc.Last)
                            {
                                // Recurse on this arc:
                                output = minArcOutput;
                                //System.out.println("    recurse last label=" + (char) arc.label + " output=" + output);
                                result.Ints[upto++] = arc.Label;
                                break;
                            }
                            else
                            {
                                // Read next arc in this node:
                                prevArc = scratchArc;
                                prevArc.CopyFrom(arc);
                                //System.out.println("      after copy label=" + (char) prevArc.label + " vs " + (char) arc.label);
                                fst.ReadNextRealArc(arc, @in);
                            }
                        }
                    }
                }
                else
                {
                    //System.out.println("  no target arcs; not found!");
                    return(null);
                }
            }
        }