Esempio n. 1
0
        // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
        // SEEK_END)?  saves the eq check above?
        /// <summary>
        /// Seeks to largest term that's <= target. </summary>
        protected internal virtual void DoSeekFloor()
        {
            // TODO: possibly caller could/should provide common
            // prefix length?  ie this work may be redundant if
            // caller is in fact intersecting against its own
            // automaton
            //System.out.println("FE: seek floor upto=" + upto);

            // Save CPU by starting at the end of the shared prefix
            // b/w our current term & the target:
            RewindPrefix();

            //System.out.println("FE: after rewind upto=" + upto);

            FST <T> .Arc <T> arc = GetArc(Upto);
            int targetLabel      = TargetLabel;

            //System.out.println("FE: init targetLabel=" + targetLabel);

            // Now scan forward, matching the new suffix of the target
            while (true)
            {
                //System.out.println("  cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" + arc.bytesPerArc);

                if (arc.BytesPerArc != 0 && arc.Label != FST <T> .END_LABEL)
                {
                    // Arcs are fixed array -- use binary search to find
                    // the target.

                    FST <T> .BytesReader @in = Fst.BytesReader;
                    int low  = arc.ArcIdx;
                    int high = arc.NumArcs - 1;
                    int mid  = 0;
                    //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
                    bool found = false;
                    while (low <= high)
                    {
                        mid          = (int)((uint)(low + high) >> 1);
                        @in.Position = arc.PosArcsStart;
                        @in.SkipBytes(arc.BytesPerArc * mid + 1);
                        int midLabel = Fst.ReadLabel(@in);
                        int cmp      = midLabel - targetLabel;
                        //System.out.println("  cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            found = true;
                            break;
                        }
                    }

                    // NOTE: this code is dup'd w/ the code below (in
                    // the outer else clause):
                    if (found)
                    {
                        // Match -- recurse
                        //System.out.println("  match!  arcIdx=" + mid);
                        arc.ArcIdx = mid - 1;
                        Fst.ReadNextRealArc(arc, @in);
                        Debug.Assert(arc.ArcIdx == mid);
                        Debug.Assert(arc.Label == targetLabel, "arc.label=" + arc.Label + " vs targetLabel=" + targetLabel + " mid=" + mid);
                        Output[Upto] = Fst.Outputs.Add(Output[Upto - 1], arc.Output);
                        if (targetLabel == FST <T> .END_LABEL)
                        {
                            return;
                        }
                        CurrentLabel = arc.Label;
                        Incr();
                        arc         = Fst.ReadFirstTargetArc(arc, GetArc(Upto), FstReader);
                        targetLabel = TargetLabel;
                        continue;
                    }
                    else if (high == -1)
                    {
                        //System.out.println("  before first");
                        // Very first arc is after our target
                        // TODO: if each arc could somehow read the arc just
                        // before, we can save this re-scan.  The ceil case
                        // doesn't need this because it reads the next arc
                        // instead:
                        while (true)
                        {
                            // First, walk backwards until we find a first arc
                            // that's before our target label:
                            Fst.ReadFirstTargetArc(GetArc(Upto - 1), arc, FstReader);
                            if (arc.Label < targetLabel)
                            {
                                // Then, scan forwards to the arc just before
                                // the targetLabel:
                                while (!arc.Last && Fst.ReadNextArcLabel(arc, @in) < targetLabel)
                                {
                                    Fst.ReadNextArc(arc, FstReader);
                                }
                                PushLast();
                                return;
                            }
                            Upto--;
                            if (Upto == 0)
                            {
                                return;
                            }
                            targetLabel = TargetLabel;
                            arc         = GetArc(Upto);
                        }
                    }
                    else
                    {
                        // There is a floor arc:
                        arc.ArcIdx = (low > high ? high : low) - 1;
                        //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
                        Fst.ReadNextRealArc(arc, @in);
                        Debug.Assert(arc.Last || Fst.ReadNextArcLabel(arc, @in) > targetLabel);
                        Debug.Assert(arc.Label < targetLabel, "arc.label=" + arc.Label + " vs targetLabel=" + targetLabel);
                        PushLast();
                        return;
                    }
                }
                else
                {
                    if (arc.Label == targetLabel)
                    {
                        // Match -- recurse
                        Output[Upto] = Fst.Outputs.Add(Output[Upto - 1], arc.Output);
                        if (targetLabel == FST <T> .END_LABEL)
                        {
                            return;
                        }
                        CurrentLabel = arc.Label;
                        Incr();
                        arc         = Fst.ReadFirstTargetArc(arc, GetArc(Upto), FstReader);
                        targetLabel = TargetLabel;
                    }
                    else if (arc.Label > targetLabel)
                    {
                        // TODO: if each arc could somehow read the arc just
                        // before, we can save this re-scan.  The ceil case
                        // doesn't need this because it reads the next arc
                        // instead:
                        while (true)
                        {
                            // First, walk backwards until we find a first arc
                            // that's before our target label:
                            Fst.ReadFirstTargetArc(GetArc(Upto - 1), arc, FstReader);
                            if (arc.Label < targetLabel)
                            {
                                // Then, scan forwards to the arc just before
                                // the targetLabel:
                                while (!arc.Last && Fst.ReadNextArcLabel(arc, FstReader) < targetLabel)
                                {
                                    Fst.ReadNextArc(arc, FstReader);
                                }
                                PushLast();
                                return;
                            }
                            Upto--;
                            if (Upto == 0)
                            {
                                return;
                            }
                            targetLabel = TargetLabel;
                            arc         = GetArc(Upto);
                        }
                    }
                    else if (!arc.Last)
                    {
                        //System.out.println("  check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
                        if (Fst.ReadNextArcLabel(arc, FstReader) > targetLabel)
                        {
                            PushLast();
                            return;
                        }
                        else
                        {
                            // keep scanning
                            Fst.ReadNextArc(arc, FstReader);
                        }
                    }
                    else
                    {
                        PushLast();
                        return;
                    }
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Expert: like <seealso cref="Util#getByOutput(FST, long)"/> except reusing
        /// BytesReader, initial and scratch Arc, and result.
        /// </summary>
        public static IntsRef GetByOutput(FST <long?> fst, long targetOutput, FST <long?> .BytesReader @in, FST <long?> .Arc <long?> arc, FST <long?> .Arc <long?> scratchArc, IntsRef result)
        {
            long output = arc.Output.Value;
            int  upto   = 0;

            //System.out.println("reverseLookup output=" + targetOutput);

            while (true)
            {
                //System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
                if (arc.Final)
                {
                    long finalOutput = output + arc.NextFinalOutput.Value;
                    //System.out.println("  isFinal finalOutput=" + finalOutput);
                    if (finalOutput == targetOutput)
                    {
                        result.Length = upto;
                        //System.out.println("    found!");
                        return(result);
                    }
                    else if (finalOutput > targetOutput)
                    {
                        //System.out.println("    not found!");
                        return(null);
                    }
                }

                if (FST <long?> .TargetHasArcs(arc))
                {
                    //System.out.println("  targetHasArcs");
                    if (result.Ints.Length == upto)
                    {
                        result.Grow(1 + upto);
                    }

                    fst.ReadFirstRealTargetArc(arc.Target, arc, @in);

                    if (arc.BytesPerArc != 0)
                    {
                        int low  = 0;
                        int high = arc.NumArcs - 1;
                        int mid  = 0;
                        //System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output);
                        bool exact = false;
                        while (low <= high)
                        {
                            mid          = (int)((uint)(low + high) >> 1);
                            @in.Position = arc.PosArcsStart;
                            @in.SkipBytes(arc.BytesPerArc * mid);
                            var flags = (sbyte)@in.ReadByte();
                            fst.ReadLabel(@in);
                            long minArcOutput;
                            if ((flags & FST <long> .BIT_ARC_HAS_OUTPUT) != 0)
                            {
                                long arcOutput = fst.Outputs.Read(@in).Value;
                                minArcOutput = output + arcOutput;
                            }
                            else
                            {
                                minArcOutput = output;
                            }
                            if (minArcOutput == targetOutput)
                            {
                                exact = true;
                                break;
                            }
                            else if (minArcOutput < targetOutput)
                            {
                                low = mid + 1;
                            }
                            else
                            {
                                high = mid - 1;
                            }
                        }

                        if (high == -1)
                        {
                            return(null);
                        }
                        else if (exact)
                        {
                            arc.ArcIdx = mid - 1;
                        }
                        else
                        {
                            arc.ArcIdx = low - 2;
                        }

                        fst.ReadNextRealArc(arc, @in);
                        result.Ints[upto++] = arc.Label;
                        output += arc.Output.Value;
                    }
                    else
                    {
                        FST <long?> .Arc <long?> prevArc = null;

                        while (true)
                        {
                            //System.out.println("    cycle label=" + arc.label + " output=" + arc.output);

                            // this is the min output we'd hit if we follow
                            // this arc:
                            long minArcOutput = output + arc.Output.Value;

                            if (minArcOutput == targetOutput)
                            {
                                // Recurse on this arc:
                                //System.out.println("  match!  break");
                                output = minArcOutput;
                                result.Ints[upto++] = arc.Label;
                                break;
                            }
                            else if (minArcOutput > targetOutput)
                            {
                                if (prevArc == null)
                                {
                                    // Output doesn't exist
                                    return(null);
                                }
                                else
                                {
                                    // Recurse on previous arc:
                                    arc.CopyFrom(prevArc);
                                    result.Ints[upto++] = arc.Label;
                                    output += arc.Output.Value;
                                    //System.out.println("    recurse prev label=" + (char) arc.label + " output=" + output);
                                    break;
                                }
                            }
                            else if (arc.Last)
                            {
                                // Recurse on this arc:
                                output = minArcOutput;
                                //System.out.println("    recurse last label=" + (char) arc.label + " output=" + output);
                                result.Ints[upto++] = arc.Label;
                                break;
                            }
                            else
                            {
                                // Read next arc in this node:
                                prevArc = scratchArc;
                                prevArc.CopyFrom(arc);
                                //System.out.println("      after copy label=" + (char) prevArc.label + " vs " + (char) arc.label);
                                fst.ReadNextRealArc(arc, @in);
                            }
                        }
                    }
                }
                else
                {
                    //System.out.println("  no target arcs; not found!");
                    return(null);
                }
            }
        }
Esempio n. 3
0
        // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
        // SEEK_END)?  saves the eq check above?

        /// <summary>
        /// Seeks to smallest term that's >= target. </summary>
        protected internal virtual void DoSeekCeil()
        {
            //System.out.println("    advance len=" + target.length + " curlen=" + current.length);

            // TODO: possibly caller could/should provide common
            // prefix length?  ie this work may be redundant if
            // caller is in fact intersecting against its own
            // automaton

            //System.out.println("FE.seekCeil upto=" + upto);

            // Save time by starting at the end of the shared prefix
            // b/w our current term & the target:
            RewindPrefix();
            //System.out.println("  after rewind upto=" + upto);

            FST <T> .Arc <T> arc = GetArc(Upto);
            int targetLabel      = TargetLabel;

            //System.out.println("  init targetLabel=" + targetLabel);

            // Now scan forward, matching the new suffix of the target
            while (true)
            {
                //System.out.println("  cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);

                if (arc.BytesPerArc != 0 && arc.Label != -1)
                {
                    // Arcs are fixed array -- use binary search to find
                    // the target.

                    FST <T> .BytesReader @in = Fst.BytesReader;
                    int low  = arc.ArcIdx;
                    int high = arc.NumArcs - 1;
                    int mid  = 0;
                    //System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
                    bool found = false;
                    while (low <= high)
                    {
                        mid          = (int)((uint)(low + high) >> 1);
                        @in.Position = arc.PosArcsStart;
                        @in.SkipBytes(arc.BytesPerArc * mid + 1);
                        int midLabel = Fst.ReadLabel(@in);
                        int cmp      = midLabel - targetLabel;
                        //System.out.println("  cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            found = true;
                            break;
                        }
                    }

                    // NOTE: this code is dup'd w/ the code below (in
                    // the outer else clause):
                    if (found)
                    {
                        // Match
                        arc.ArcIdx = mid - 1;
                        Fst.ReadNextRealArc(arc, @in);
                        Debug.Assert(arc.ArcIdx == mid);
                        Debug.Assert(arc.Label == targetLabel, "arc.label=" + arc.Label + " vs targetLabel=" + targetLabel + " mid=" + mid);
                        Output[Upto] = Fst.Outputs.Add(Output[Upto - 1], arc.Output);
                        if (targetLabel == FST <T> .END_LABEL)
                        {
                            return;
                        }
                        CurrentLabel = arc.Label;
                        Incr();
                        arc         = Fst.ReadFirstTargetArc(arc, GetArc(Upto), FstReader);
                        targetLabel = TargetLabel;
                        continue;
                    }
                    else if (low == arc.NumArcs)
                    {
                        // Dead end
                        arc.ArcIdx = arc.NumArcs - 2;
                        Fst.ReadNextRealArc(arc, @in);
                        Debug.Assert(arc.Last);
                        // Dead end (target is after the last arc);
                        // rollback to last fork then push
                        Upto--;
                        while (true)
                        {
                            if (Upto == 0)
                            {
                                return;
                            }
                            FST <T> .Arc <T> prevArc = GetArc(Upto);
                            //System.out.println("  rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
                            if (!prevArc.Last)
                            {
                                Fst.ReadNextArc(prevArc, FstReader);
                                PushFirst();
                                return;
                            }
                            Upto--;
                        }
                    }
                    else
                    {
                        arc.ArcIdx = (low > high ? low : high) - 1;
                        Fst.ReadNextRealArc(arc, @in);
                        Debug.Assert(arc.Label > targetLabel);
                        PushFirst();
                        return;
                    }
                }
                else
                {
                    // Arcs are not array'd -- must do linear scan:
                    if (arc.Label == targetLabel)
                    {
                        // recurse
                        Output[Upto] = Fst.Outputs.Add(Output[Upto - 1], arc.Output);
                        if (targetLabel == FST <T> .END_LABEL)
                        {
                            return;
                        }
                        CurrentLabel = arc.Label;
                        Incr();
                        arc         = Fst.ReadFirstTargetArc(arc, GetArc(Upto), FstReader);
                        targetLabel = TargetLabel;
                    }
                    else if (arc.Label > targetLabel)
                    {
                        PushFirst();
                        return;
                    }
                    else if (arc.Last)
                    {
                        // Dead end (target is after the last arc);
                        // rollback to last fork then push
                        Upto--;
                        while (true)
                        {
                            if (Upto == 0)
                            {
                                return;
                            }
                            FST <T> .Arc <T> prevArc = GetArc(Upto);
                            //System.out.println("  rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
                            if (!prevArc.Last)
                            {
                                Fst.ReadNextArc(prevArc, FstReader);
                                PushFirst();
                                return;
                            }
                            Upto--;
                        }
                    }
                    else
                    {
                        // keep scanning
                        //System.out.println("    next scan");
                        Fst.ReadNextArc(arc, FstReader);
                    }
                }
            }
        }
Esempio n. 4
0
        // Uncomment for debugging:

        /*
         * public static <T> void dotToFile(FST<T> fst, String filePath) throws IOException {
         * Writer w = new OutputStreamWriter(new FileOutputStream(filePath));
         * toDot(fst, w, true, true);
         * w.close();
         * }
         */

        /// <summary>
        /// Reads the first arc greater or equal that the given label into the provided
        /// arc in place and returns it iff found, otherwise return <code>null</code>.
        /// </summary>
        /// <param name="label"> the label to ceil on </param>
        /// <param name="fst"> the fst to operate on </param>
        /// <param name="follow"> the arc to follow reading the label from </param>
        /// <param name="arc"> the arc to read into in place </param>
        /// <param name="in"> the fst's <seealso cref="BytesReader"/> </param>
        public static FST <T> .Arc <T> readCeilArc <T>(int label, FST <T> fst, FST <T> .Arc <T> follow, FST <T> .Arc <T> arc, FST <T> .BytesReader @in)
        {
            // TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum?
            if (label == FST <T> .END_LABEL)
            {
                if (follow.Final)
                {
                    if (follow.Target <= 0)
                    {
                        arc.Flags = (sbyte)FST <T> .BIT_LAST_ARC;
                    }
                    else
                    {
                        arc.Flags = 0;
                        // NOTE: nextArc is a node (not an address!) in this case:
                        arc.NextArc = follow.Target;
                        arc.Node    = follow.Target;
                    }
                    arc.Output = follow.NextFinalOutput;
                    arc.Label  = FST <T> .END_LABEL;
                    return(arc);
                }
                else
                {
                    return(null);
                }
            }

            if (!FST <T> .TargetHasArcs(follow))
            {
                return(null);
            }
            fst.ReadFirstTargetArc(follow, arc, @in);
            if (arc.BytesPerArc != 0 && arc.Label != FST <T> .END_LABEL)
            {
                // Arcs are fixed array -- use binary search to find
                // the target.

                int low  = arc.ArcIdx;
                int high = arc.NumArcs - 1;
                int mid  = 0;
                // System.out.println("do arc array low=" + low + " high=" + high +
                // " targetLabel=" + targetLabel);
                while (low <= high)
                {
                    mid          = (int)((uint)(low + high) >> 1);
                    @in.Position = arc.PosArcsStart;
                    @in.SkipBytes(arc.BytesPerArc * mid + 1);
                    int midLabel = fst.ReadLabel(@in);
                    int cmp      = midLabel - label;
                    // System.out.println("  cycle low=" + low + " high=" + high + " mid=" +
                    // mid + " midLabel=" + midLabel + " cmp=" + cmp);
                    if (cmp < 0)
                    {
                        low = mid + 1;
                    }
                    else if (cmp > 0)
                    {
                        high = mid - 1;
                    }
                    else
                    {
                        arc.ArcIdx = mid - 1;
                        return(fst.ReadNextRealArc(arc, @in));
                    }
                }
                if (low == arc.NumArcs)
                {
                    // DEAD END!
                    return(null);
                }

                arc.ArcIdx = (low > high ? high : low);
                return(fst.ReadNextRealArc(arc, @in));
            }

            // Linear scan
            fst.ReadFirstRealTargetArc(follow.Target, arc, @in);

            while (true)
            {
                // System.out.println("  non-bs cycle");
                // TODO: we should fix this code to not have to create
                // object for the output of every arc we scan... only
                // for the matching arc, if found
                if (arc.Label >= label)
                {
                    // System.out.println("    found!");
                    return(arc);
                }
                else if (arc.Last)
                {
                    return(null);
                }
                else
                {
                    fst.ReadNextRealArc(arc, @in);
                }
            }
        }