コード例 #1
0
        /// <summary>
        /// Cache the root node's output arcs starting with completions with the
        /// highest weights.
        /// </summary>
        private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton)
        {
            try
            {
                IList <FST.Arc <object> > rootArcs  = new List <FST.Arc <object> >();
                FST.Arc <object>          arc       = automaton.GetFirstArc(new FST.Arc <object>());
                FST.BytesReader           fstReader = automaton.BytesReader;
                automaton.ReadFirstTargetArc(arc, arc, fstReader);
                while (true)
                {
                    rootArcs.Add((new FST.Arc <object>()).CopyFrom(arc));
                    if (arc.IsLast)
                    {
                        break;
                    }
                    automaton.ReadNextArc(arc, fstReader);
                }

                // we want highest weights first.
                return(rootArcs.Reverse().ToArray());
            }
            catch (IOException e)
            {
                throw new Exception(e.Message, e);
            }
        }
コード例 #2
0
        internal static void Walk <T>(FST <T> fst) // LUCENENET NOTE: Not referenced
        {
            List <FST.Arc <T> > queue = new List <FST.Arc <T> >();

            FST.BytesReader reader   = fst.GetBytesReader();
            FST.Arc <T>     startArc = fst.GetFirstArc(new FST.Arc <T>());
            queue.Add(startArc);
            BitArray seen = new BitArray(queue.Count);

            while (queue.Count > 0)
            {
                FST.Arc <T> arc = queue[0];
                queue.RemoveAt(0);

                long node = arc.Target;
                //System.out.println(arc);
                if (FST <T> .TargetHasArcs(arc) && !seen.SafeGet((int)node))
                {
                    seen.SafeSet((int)node, true);
                    fst.ReadFirstRealTargetArc(node, arc, reader);
                    while (true)
                    {
                        queue.Add((new FST.Arc <T>()).CopyFrom(arc));
                        if (arc.IsLast)
                        {
                            break;
                        }
                        else
                        {
                            fst.ReadNextRealArc(arc, reader);
                        }
                    }
                }
            }
        }
コード例 #3
0
        // runs the term, returning the output, or null if term
        // isn't accepted.  if prefixLength is non-null it must be
        // length 1 int array; prefixLength[0] is set to the length
        // of the term prefix that matches
        private static T Run(FST <T> fst, Int32sRef term, int[] prefixLength) // LUCENENET: CA1822: Mark members as static
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(prefixLength == null || prefixLength.Length == 1);
            }
            FST.Arc <T> arc       = fst.GetFirstArc(new FST.Arc <T>());
            T           NO_OUTPUT = fst.Outputs.NoOutput;
            T           output    = NO_OUTPUT;

            FST.BytesReader fstReader = fst.GetBytesReader();

            for (int i = 0; i <= term.Length; i++)
            {
                int label;
                if (i == term.Length)
                {
                    label = FST.END_LABEL;
                }
                else
                {
                    label = term.Int32s[term.Offset + i];
                }
                // System.out.println("   loop i=" + i + " label=" + label + " output=" + fst.Outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
                if (fst.FindTargetArc(label, arc, arc, fstReader) == null)
                {
                    // System.out.println("    not found");
                    if (prefixLength != null)
                    {
                        prefixLength[0] = i;
                        return(output);
                    }
                    else
                    {
                        return(default);
コード例 #4
0
ファイル: FSTCompletion.cs プロジェクト: ywscr/lucenenet
        /// <summary>
        /// Cache the root node's output arcs starting with completions with the
        /// highest weights.
        /// </summary>
        private static FST.Arc <object>[] CacheRootArcs(FST <object> automaton)
        {
            try
            {
                // LUCENENET specific: Using a stack rather than List, as we want the results in reverse
                Stack <FST.Arc <object> > rootArcs  = new Stack <FST.Arc <object> >();
                FST.Arc <object>          arc       = automaton.GetFirstArc(new FST.Arc <object>());
                FST.BytesReader           fstReader = automaton.GetBytesReader();
                automaton.ReadFirstTargetArc(arc, arc, fstReader);
                while (true)
                {
                    rootArcs.Push(new FST.Arc <object>().CopyFrom(arc));
                    if (arc.IsLast)
                    {
                        break;
                    }
                    automaton.ReadNextArc(arc, fstReader);
                }

                // we want highest weights first.
                return(rootArcs.ToArray());
            }
            catch (Exception e) when(e.IsIOException())
            {
                throw RuntimeException.Create(e);
            }
        }
コード例 #5
0
 // Use the builder to create:
 private NormalizeCharMap(FST<CharsRef> map)
 {
     this.map = map;
     if (map != null)
     {
         try
         {
             // Pre-cache root arcs:
             var scratchArc = new FST.Arc<CharsRef>();
             FST.BytesReader fstReader = map.BytesReader;
             map.GetFirstArc(scratchArc);
             if (FST<CharsRef>.TargetHasArcs(scratchArc))
             {
                 map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader);
                 while (true)
                 {
                     Debug.Assert(scratchArc.Label != FST.END_LABEL);
                     cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc<CharsRef>()).CopyFrom(scratchArc);
                     if (scratchArc.IsLast)
                     {
                         break;
                     }
                     map.ReadNextRealArc(scratchArc, fstReader);
                 }
             }
             //System.out.println("cached " + cachedRootArcs.size() + " root arcs");
         }
         catch (IOException ioe)
         {
             // Bogus FST IOExceptions!!  (will never happen)
             throw new Exception("Should never happen", ioe);
         }
     }
 }
コード例 #6
0
        /// <summary>
        /// Looks up the output for this input, or null if the
        ///  input is not accepted.
        /// </summary>
        public static T Get <T>(FST <T> fst, IntsRef input)
        {
            // TODO: would be nice not to alloc this on every lookup
            var arc = fst.GetFirstArc(new FST <T> .Arc <T>());

            var fstReader = fst.BytesReader;

            // Accumulate output as we go
            T output = fst.Outputs.NoOutput;

            for (int i = 0; i < input.Length; i++)
            {
                if (fst.FindTargetArc(input.Ints[input.Offset + i], arc, arc, fstReader) == null)
                {
                    return(default(T));
                }
                output = fst.Outputs.Add(output, arc.Output);
            }

            if (arc.Final)
            {
                return(fst.Outputs.Add(output, arc.NextFinalOutput));
            }
            else
            {
                return(default(T));
            }
        }
コード例 #7
0
        private long?LookupPrefix(BytesRef scratch, FST.Arc <long?> arc) //Bogus
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(0 == (long)fst.Outputs.NoOutput);
            }
            long output      = 0;
            var  bytesReader = fst.GetBytesReader();

            fst.GetFirstArc(arc);

            byte[] bytes = scratch.Bytes;
            int    pos   = scratch.Offset;
            int    end   = pos + scratch.Length;

            while (pos < end)
            {
                if (fst.FindTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null)
                {
                    return(null);
                }
                else
                {
                    output += (long)arc.Output;
                }
            }

            return(output);
        }
コード例 #8
0
ファイル: NormalizeCharMap.cs プロジェクト: vicancy/lucenenet
 // Use the builder to create:
 private NormalizeCharMap(FST <CharsRef> map)
 {
     this.map = map;
     if (map != null)
     {
         try
         {
             // Pre-cache root arcs:
             var             scratchArc = new FST.Arc <CharsRef>();
             FST.BytesReader fstReader  = map.BytesReader;
             map.GetFirstArc(scratchArc);
             if (FST <CharsRef> .TargetHasArcs(scratchArc))
             {
                 map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader);
                 while (true)
                 {
                     Debug.Assert(scratchArc.Label != FST <CharsRef> .END_LABEL); // LUCENENET TODO END_LABEL shouldn't be under generic?
                     cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc <CharsRef>()).CopyFrom(scratchArc);
                     if (scratchArc.Last)
                     {
                         break;
                     }
                     map.ReadNextRealArc(scratchArc, fstReader);
                 }
             }
             //System.out.println("cached " + cachedRootArcs.size() + " root arcs");
         }
         catch (IOException ioe)
         {
             // Bogus FST IOExceptions!!  (will never happen)
             throw new Exception("Should never happen", ioe);
         }
     }
 }
コード例 #9
0
ファイル: Util.cs プロジェクト: zhuthree/lucenenet
        // TODO: maybe a CharsRef version for BYTE2

        /// <summary>
        /// Looks up the output for this input, or <c>null</c> if the
        /// input is not accepted
        /// </summary>
        public static T Get <T>(FST <T> fst, BytesRef input)
        {
            Debug.Assert(fst.InputType == FST.INPUT_TYPE.BYTE1);

            var fstReader = fst.GetBytesReader();

            // TODO: would be nice not to alloc this on every lookup
            var arc = fst.GetFirstArc(new FST.Arc <T>());

            // Accumulate output as we go
            T output = fst.Outputs.NoOutput;

            for (int i = 0; i < input.Length; i++)
            {
                if (fst.FindTargetArc(input.Bytes[i + input.Offset] & 0xFF, arc, arc, fstReader) == null)
                {
                    return(default(T));
                }
                output = fst.Outputs.Add(output, arc.Output);
            }

            if (arc.IsFinal)
            {
                return(fst.Outputs.Add(output, arc.NextFinalOutput));
            }
            else
            {
                return(default(T));
            }
        }
コード例 #10
0
        internal static void Walk <T>(FST <T> fst)
        {
            var queue    = new List <FST.Arc <T> >();
            var seen     = new BitArray();
            var reader   = fst.BytesReader;
            var startArc = fst.GetFirstArc(new FST.Arc <T>());

            queue.Add(startArc);
            while (queue.Count > 0)
            {
                FST.Arc <T> arc  = queue.Remove(0);
                long        node = arc.Target;
                //System.out.println(arc);
                if (FST.TargetHasArcs(arc) && !seen.Get((int)node))
                {
                    seen.Set((int)node, true);
                    fst.ReadFirstRealTargetArc(node, arc, reader);
                    while (true)
                    {
                        queue.Add((new FST.Arc <T>()).CopyFrom(arc));
                        if (arc.Last)
                        {
                            break;
                        }
                        else
                        {
                            fst.ReadNextRealArc(arc, reader);
                        }
                    }
                }
            }
        }
コード例 #11
0
 /// <summary>
 /// doFloor controls the behavior of advance: if it's true
 ///  doFloor is true, advance positions to the biggest
 ///  term before target.
 /// </summary>
 protected internal FSTEnum(FST <T> fst)
 {
     this.Fst  = fst;
     FstReader = fst.BytesReader;
     NO_OUTPUT = fst.Outputs.NoOutput;
     fst.GetFirstArc(GetArc(0));
     Output[0] = NO_OUTPUT;
 }
コード例 #12
0
 private FST.Arc <long?>[] CacheRootArcs()
 {
     FST.Arc <long?>[] rootCache = new FST.Arc <long?> [1 + (cacheCeiling - 0x3040)];
     FST.Arc <long?>   firstArc  = new FST.Arc <long?>();
     fst.GetFirstArc(firstArc);
     FST.Arc <long?> arc       = new FST.Arc <long?>();
     FST.BytesReader fstReader = fst.GetBytesReader();
     // TODO: jump to 3040, readNextRealArc to ceiling? (just be careful we don't add bugs)
     for (int i = 0; i < rootCache.Length; i++)
     {
         if (fst.FindTargetArc(0x3040 + i, firstArc, arc, fstReader) != null)
         {
             rootCache[i] = new FST.Arc <long?>().CopyFrom(arc);
         }
     }
     return(rootCache);
 }
コード例 #13
0
 /// <summary>
 /// doFloor controls the behavior of advance: if it's true
 /// doFloor is true, advance positions to the biggest
 /// term before target.
 /// </summary>
 protected FSTEnum(FST <T> fst)
 {
     this.m_fst  = fst;
     m_fstReader = fst.GetBytesReader();
     NO_OUTPUT   = fst.Outputs.NoOutput;
     fst.GetFirstArc(GetArc(0));
     m_output[0] = NO_OUTPUT;
 }
コード例 #14
0
ファイル: Util.cs プロジェクト: zhuthree/lucenenet
        /// <summary>
        /// Reverse lookup (lookup by output instead of by input),
        /// in the special case when your FSTs outputs are
        /// strictly ascending.  This locates the input/output
        /// pair where the output is equal to the target, and will
        /// return <c>null</c> if that output does not exist.
        ///
        /// <para/>NOTE: this only works with <see cref="T:FST{long?}"/>, only
        /// works when the outputs are ascending in order with
        /// the inputs.
        /// For example, simple ordinals (0, 1,
        /// 2, ...), or file offets (when appending to a file)
        /// fit this.
        /// </summary>
        public static Int32sRef GetByOutput(FST <long?> fst, long targetOutput)
        {
            var @in = fst.GetBytesReader();

            // TODO: would be nice not to alloc this on every lookup
            FST.Arc <long?> arc = fst.GetFirstArc(new FST.Arc <long?>());

            FST.Arc <long?> scratchArc = new FST.Arc <long?>();

            Int32sRef result = new Int32sRef();

            return(GetByOutput(fst, targetOutput, @in, arc, scratchArc, result));
        }
コード例 #15
0
            public override void SeekExact(long ord)
            {
                // TODO: would be better to make this simpler and faster.
                // but we dont want to introduce a bug that corrupts our enum state!
                bytesReader.Position = 0;
                fst.GetFirstArc(firstArc);
                Int32sRef output = Lucene.Net.Util.Fst.Util.GetByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);

                scratchBytes.Bytes  = new byte[output.Length];
                scratchBytes.Offset = 0;
                scratchBytes.Length = 0;
                Lucene.Net.Util.Fst.Util.ToBytesRef(output, scratchBytes);
                // TODO: we could do this lazily, better to try to push into FSTEnum though?
                @in.SeekExact(scratchBytes);
            }
コード例 #16
0
        /// <summary>
        /// Looks up the output for this input, or <c>null</c> if the
        /// input is not accepted.
        /// </summary>
        public static T Get <T>(FST <T> fst, Int32sRef input) where T : class // LUCENENET specific - added class constraint, since we compare reference equality
        {
            // TODO: would be nice not to alloc this on every lookup
            var arc = fst.GetFirstArc(new FST.Arc <T>());

            var fstReader = fst.GetBytesReader();

            // Accumulate output as we go
            T output = fst.Outputs.NoOutput;

            for (int i = 0; i < input.Length; i++)
            {
                if (fst.FindTargetArc(input.Int32s[input.Offset + i], arc, arc, fstReader) is null)
                {
                    return(default);
コード例 #17
0
        private T RandomAcceptedWord(FST <T> fst, IntsRef @in)
        {
            FST.Arc <T> arc = fst.GetFirstArc(new FST.Arc <T>());

            IList <FST.Arc <T> > arcs = new List <FST.Arc <T> >();

            @in.Length = 0;
            @in.Offset = 0;
            T NO_OUTPUT = fst.Outputs.NoOutput;
            T output    = NO_OUTPUT;

            FST.BytesReader fstReader = fst.BytesReader;

            while (true)
            {
                // read all arcs:
                fst.ReadFirstTargetArc(arc, arc, fstReader);
                arcs.Add((new FST.Arc <T>()).CopyFrom(arc));
                while (!arc.Last)
                {
                    fst.ReadNextArc(arc, fstReader);
                    arcs.Add((new FST.Arc <T>()).CopyFrom(arc));
                }

                // pick one
                arc = arcs[Random.Next(arcs.Count)];
                arcs.Clear();

                // accumulate output
                output = fst.Outputs.Add(output, arc.Output);

                // append label
                if (arc.Label == FST <T> .END_LABEL)
                {
                    break;
                }

                if (@in.Ints.Length == @in.Length)
                {
                    @in.Grow(1 + @in.Length);
                }
                @in.Ints[@in.Length++] = arc.Label;
            }

            return(output);
        }
コード例 #18
0
 public override void LookupOrd(int ord, BytesRef result)
 {
     try
     {
         @in.Position = 0;
         fst.GetFirstArc(firstArc);
         Int32sRef output = Lucene.Net.Util.Fst.Util.GetByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
         result.Bytes  = new byte[output.Length];
         result.Offset = 0;
         result.Length = 0;
         Util.ToBytesRef(output, result);
     }
     catch (Exception bogus) when(bogus.IsIOException())
     {
         throw RuntimeException.Create(bogus);
     }
 }
コード例 #19
0
 public override void LookupOrd(long ord, BytesRef result)
 {
     try
     {
         @in.Position = 0;
         fst.GetFirstArc(firstArc);
         Int32sRef output = Lucene.Net.Util.Fst.Util.GetByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
         result.Bytes  = new byte[output.Length];
         result.Offset = 0;
         result.Length = 0;
         Lucene.Net.Util.Fst.Util.ToBytesRef(output, result);
     }
     catch (IOException bogus)
     {
         throw new Exception(bogus.ToString(), bogus);
     }
 }
コード例 #20
0
 public override void LookupOrd(int ord, BytesRef result)
 {
     try
     {
         @in.Position = 0;
         Fst.GetFirstArc(FirstArc);
         IntsRef output = Lucene.Net.Util.Fst.Util.GetByOutput(Fst, ord, @in, FirstArc, ScratchArc, ScratchInts);
         result.Bytes  = new byte[output.Length];
         result.Offset = 0;
         result.Length = 0;
         Util.ToBytesRef(output, result);
     }
     catch (System.IO.IOException bogus)
     {
         throw bogus;
     }
 }
コード例 #21
0
ファイル: Dictionary.cs プロジェクト: wwb/lucenenet
        // TODO: this is pretty stupid, considering how the stemming algorithm works
        // we can speed it up to be significantly faster!
        internal virtual IntsRef Lookup(FST <IntsRef> fst, char[] word, int offset, int length)
        {
            if (fst == null)
            {
                return(null);
            }
            FST.BytesReader   bytesReader = fst.BytesReader;
            FST.Arc <IntsRef> arc         = fst.GetFirstArc(new FST.Arc <IntsRef>());
            // Accumulate output as we go
            IntsRef NO_OUTPUT = fst.Outputs.NoOutput;
            IntsRef output    = NO_OUTPUT;

            int l = offset + length;

            try
            {
                for (int i = offset, cp = 0; i < l; i += Character.CharCount(cp))
                {
                    cp = Character.CodePointAt(word, i, l);
                    if (fst.FindTargetArc(cp, arc, arc, bytesReader) == null)
                    {
                        return(null);
                    }
                    else if (arc.Output != NO_OUTPUT)
                    {
                        output = fst.Outputs.Add(output, arc.Output);
                    }
                }
                if (fst.FindTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null)
                {
                    return(null);
                }
                else if (arc.Output != NO_OUTPUT)
                {
                    return(fst.Outputs.Add(output, arc.Output));
                }
                else
                {
                    return(output);
                }
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus.Message, bogus);
            }
        }
コード例 #22
0
 public override void LookupOrd(int ord, BytesRef result)
 {
     try
     {
         @in.Position = 0;
         fst.GetFirstArc(firstArc);
         IntsRef output = Util.GetByOutput(fst, ord, @in, firstArc, scratchArc, scratchInts);
         result.Bytes  = new byte[output.Length];
         result.Offset = 0;
         result.Length = 0;
         Util.ToBytesRef(output, result);
     }
     catch (IOException bogus)
     {
         throw new Exception(bogus.Message, bogus);
     }
 }
コード例 #23
0
        // runs the term, returning the output, or null if term
        // isn't accepted.  if prefixLength is non-null it must be
        // length 1 int array; prefixLength[0] is set to the length
        // of the term prefix that matches
        private T Run(FST <T> fst, IntsRef term, int[] prefixLength)
        {
            Debug.Assert(prefixLength == null || prefixLength.Length == 1);
            FST <T> .Arc <T> arc = fst.GetFirstArc(new FST.Arc <T>());
            T NO_OUTPUT          = fst.Outputs.NoOutput;
            T output             = NO_OUTPUT;

            FST.BytesReader fstReader = fst.BytesReader;

            for (int i = 0; i <= term.Length; i++)
            {
                int label;
                if (i == term.Length)
                {
                    label = FST <T> .END_LABEL;
                }
                else
                {
                    label = term.Ints[term.Offset + i];
                }
                // System.out.println("   loop i=" + i + " label=" + label + " output=" + fst.Outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
                if (fst.FindTargetArc(label, arc, arc, fstReader) == null)
                {
                    // System.out.println("    not found");
                    if (prefixLength != null)
                    {
                        prefixLength[0] = i;
                        return(output);
                    }
                    else
                    {
                        return(default(T));
                    }
                }
                output = fst.Outputs.Add(output, arc.Output);
            }

            if (prefixLength != null)
            {
                prefixLength[0] = term.Length;
            }

            return(output);
        }
コード例 #24
0
ファイル: Dictionary.cs プロジェクト: wwb/lucenenet
        // TODO: this could be more efficient!
        internal static void ApplyMappings(FST <CharsRef> fst, StringBuilder sb)
        {
            FST.BytesReader    bytesReader = fst.BytesReader;
            FST.Arc <CharsRef> firstArc    = fst.GetFirstArc(new FST.Arc <CharsRef>());
            CharsRef           NO_OUTPUT   = fst.Outputs.NoOutput;

            // temporary stuff
            FST.Arc <CharsRef> arc = new FST.Arc <CharsRef>();
            int      longestMatch;
            CharsRef longestOutput;

            for (int i = 0; i < sb.Length; i++)
            {
                arc.CopyFrom(firstArc);
                CharsRef output = NO_OUTPUT;
                longestMatch  = -1;
                longestOutput = null;

                for (int j = i; j < sb.Length; j++)
                {
                    char ch = sb[j];
                    if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
                    {
                        break;
                    }
                    else
                    {
                        output = fst.Outputs.Add(output, arc.Output);
                    }
                    if (arc.IsFinal)
                    {
                        longestOutput = fst.Outputs.Add(output, arc.NextFinalOutput);
                        longestMatch  = j;
                    }
                }

                if (longestMatch >= 0)
                {
                    sb.Remove(i, longestMatch + 1 - i);
                    sb.Insert(i, longestOutput);
                    i += (longestOutput.Length - 1);
                }
            }
        }
コード例 #25
0
        private static void Walk <T>(FST <T> fst) // LUCENENET NOTE: Not referenced anywhere
        {
            var queue = new List <FST.Arc <T> >();

            // Java version was BitSet(), but in .NET we don't have a zero contructor BitSet.
            // Couldn't find the default size in BitSet, so went with zero here.
            var seen     = new BitSet();
            var reader   = fst.GetBytesReader();
            var startArc = fst.GetFirstArc(new FST.Arc <T>());

            queue.Add(startArc);
            while (queue.Count > 0)
            {
                //FST.Arc<T> arc = queue.Remove(0);
                var arc = queue[0];
                queue.RemoveAt(0);

                long node = arc.Target;
                //System.out.println(arc);
                if (FST <T> .TargetHasArcs(arc) && !seen.Get((int)node))
                {
                    seen.Set((int)node);
                    fst.ReadFirstRealTargetArc(node, arc, reader);
                    while (true)
                    {
                        queue.Add((new FST.Arc <T>()).CopyFrom(arc));
                        if (arc.IsLast)
                        {
                            break;
                        }
                        else
                        {
                            fst.ReadNextRealArc(arc, reader);
                        }
                    }
                }
            }
        }
コード例 #26
0
            /// <summary>
            /// Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
            /// </summary>
            public BytesRef Get(char[] buffer, int bufferLen, FST.Arc <BytesRef> scratchArc, FST.BytesReader fstReader)
            {
                BytesRef pendingOutput = fst.Outputs.NoOutput;
                BytesRef matchOutput   = null;
                int      bufUpto       = 0;

                fst.GetFirstArc(scratchArc);
                while (bufUpto < bufferLen)
                {
                    int codePoint = Character.CodePointAt(buffer, bufUpto, bufferLen);
                    if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, scratchArc, scratchArc, fstReader) == null)
                    {
                        return(null);
                    }
                    pendingOutput = fst.Outputs.Add(pendingOutput, scratchArc.Output);
                    bufUpto      += Character.CharCount(codePoint);
                }
                if (scratchArc.IsFinal)
                {
                    matchOutput = fst.Outputs.Add(pendingOutput, scratchArc.NextFinalOutput);
                }
                return(matchOutput);
            }
コード例 #27
0
 // Use the builder to create:
 private NormalizeCharMap(FST <CharsRef> map)
 {
     this.map = map;
     if (map != null)
     {
         try
         {
             // Pre-cache root arcs:
             var             scratchArc = new FST.Arc <CharsRef>();
             FST.BytesReader fstReader  = map.GetBytesReader();
             map.GetFirstArc(scratchArc);
             if (FST <CharsRef> .TargetHasArcs(scratchArc))
             {
                 map.ReadFirstRealTargetArc(scratchArc.Target, scratchArc, fstReader);
                 while (true)
                 {
                     if (Debugging.AssertsEnabled)
                     {
                         Debugging.Assert(scratchArc.Label != FST.END_LABEL);
                     }
                     cachedRootArcs[Convert.ToChar((char)scratchArc.Label)] = (new FST.Arc <CharsRef>()).CopyFrom(scratchArc);
                     if (scratchArc.IsLast)
                     {
                         break;
                     }
                     map.ReadNextRealArc(scratchArc, fstReader);
                 }
             }
             //System.out.println("cached " + cachedRootArcs.size() + " root arcs");
         }
         catch (Exception ioe) when(ioe.IsIOException())
         {
             // Bogus FST IOExceptions!!  (will never happen)
             throw RuntimeException.Create("Should never happen", ioe);
         }
     }
 }
コード例 #28
0
        // NOTE: copied from WFSTCompletionLookup & tweaked
        private long?LookupPrefix(FST <long?> fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc <long?> arc)
        {
            long?output = fst.Outputs.NoOutput;

            fst.GetFirstArc(arc);

            var bytes = scratch.Bytes;
            var pos   = scratch.Offset;
            var end   = pos + scratch.Length;

            while (pos < end)
            {
                if (fst.FindTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null)
                {
                    return(null);
                }
                else
                {
                    output = fst.Outputs.Add(output, arc.Output);
                }
            }

            return(output);
        }
コード例 #29
0
 /// <summary> Load frame for start arc(node) on fst. </summary>
 private Frame LoadFirstFrame(Frame frame)
 {
     frame.fstArc   = fst.GetFirstArc(frame.fstArc);
     frame.fsaState = fsa.InitialState;
     return(frame);
 }
コード例 #30
0
ファイル: SynonymFilter.cs プロジェクト: eladmarg/lucene.net
        private void Parse()
        {
            //System.out.println("\nS: parse");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(inputSkipCount == 0);
            }

            int curNextRead = nextRead;

            // Holds the longest match we've seen so far:
            BytesRef matchOutput      = null;
            int      matchInputLength = 0;
            int      matchEndOffset   = -1;

            BytesRef pendingOutput = fst.Outputs.NoOutput;

            fst.GetFirstArc(scratchArc);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(scratchArc.Output == fst.Outputs.NoOutput);
            }

            int tokenCount = 0;

            while (true)
            {
                // Pull next token's chars:
                char[] buffer;
                int    bufferLen;
                //System.out.println("  cycle nextRead=" + curNextRead + " nextWrite=" + nextWrite);

                int inputEndOffset = 0;

                if (curNextRead == nextWrite)
                {
                    // We used up our lookahead buffer of input tokens
                    // -- pull next real input token:

                    if (finished)
                    {
                        break;
                    }
                    else
                    {
                        //System.out.println("  input.incrToken");
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(futureInputs[nextWrite].consumed);
                        }
                        // Not correct: a syn match whose output is longer
                        // than its input can set future inputs keepOrig
                        // to true:
                        //assert !futureInputs[nextWrite].keepOrig;
                        if (m_input.IncrementToken())
                        {
                            buffer    = termAtt.Buffer;
                            bufferLen = termAtt.Length;
                            PendingInput pendingInput = futureInputs[nextWrite];
                            lastStartOffset = pendingInput.startOffset = offsetAtt.StartOffset;
                            lastEndOffset   = pendingInput.endOffset = offsetAtt.EndOffset;
                            inputEndOffset  = pendingInput.endOffset;
                            //System.out.println("  new token=" + new String(buffer, 0, bufferLen));
                            if (nextRead != nextWrite)
                            {
                                Capture();
                            }
                            else
                            {
                                pendingInput.consumed = false;
                            }
                        }
                        else
                        {
                            // No more input tokens
                            //System.out.println("      set end");
                            finished = true;
                            break;
                        }
                    }
                }
                else
                {
                    // Still in our lookahead
                    buffer         = futureInputs[curNextRead].term.Chars;
                    bufferLen      = futureInputs[curNextRead].term.Length;
                    inputEndOffset = futureInputs[curNextRead].endOffset;
                    //System.out.println("  old token=" + new String(buffer, 0, bufferLen));
                }

                tokenCount++;

                // Run each char in this token through the FST:
                int bufUpto = 0;
                while (bufUpto < bufferLen)
                {
                    int codePoint = Character.CodePointAt(buffer, bufUpto, bufferLen);
                    if (fst.FindTargetArc(ignoreCase ? Character.ToLower(codePoint, CultureInfo.InvariantCulture) : codePoint, scratchArc, scratchArc, fstReader) == null)
                    {
                        //System.out.println("    stop");
                        goto byTokenBreak;
                    }

                    // Accum the output
                    pendingOutput = fst.Outputs.Add(pendingOutput, scratchArc.Output);
                    //System.out.println("    char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output);
                    bufUpto += Character.CharCount(codePoint);
                }

                // OK, entire token matched; now see if this is a final
                // state:
                if (scratchArc.IsFinal)
                {
                    matchOutput      = fst.Outputs.Add(pendingOutput, scratchArc.NextFinalOutput);
                    matchInputLength = tokenCount;
                    matchEndOffset   = inputEndOffset;
                    //System.out.println("  found matchLength=" + matchInputLength + " output=" + matchOutput);
                }

                // See if the FST wants to continue matching (ie, needs to
                // see the next input token):
                if (fst.FindTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null)
                {
                    // No further rules can match here; we're done
                    // searching for matching rules starting at the
                    // current input position.
                    break;
                }
                else
                {
                    // More matching is possible -- accum the output (if
                    // any) of the WORD_SEP arc:
                    pendingOutput = fst.Outputs.Add(pendingOutput, scratchArc.Output);
                    if (nextRead == nextWrite)
                    {
                        Capture();
                    }
                }

                curNextRead = RollIncr(curNextRead);
            }
byTokenBreak:

            if (nextRead == nextWrite && !finished)
            {
                //System.out.println("  skip write slot=" + nextWrite);
                nextWrite = RollIncr(nextWrite);
            }

            if (matchOutput != null)
            {
                //System.out.println("  add matchLength=" + matchInputLength + " output=" + matchOutput);
                inputSkipCount = matchInputLength;
                AddOutput(matchOutput, matchInputLength, matchEndOffset);
            }
            else if (nextRead != nextWrite)
            {
                // Even though we had no match here, we set to 1
                // because we need to skip current input token before
                // trying to match again:
                inputSkipCount = 1;
            }
            else
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(finished);
                }
            }

            //System.out.println("  parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
        }
コード例 #31
0
ファイル: Util.cs プロジェクト: zhuthree/lucenenet
        /// <summary>
        /// Dumps an <see cref="FST{T}"/> to a GraphViz's <c>dot</c> language description
        /// for visualization. Example of use:
        ///
        /// <code>
        /// using (TextWriter sw = new StreamWriter(&quot;out.dot&quot;))
        /// {
        ///     Util.ToDot(fst, sw, true, true);
        /// }
        /// </code>
        ///
        /// and then, from command line:
        ///
        /// <code>
        /// dot -Tpng -o out.png out.dot
        /// </code>
        ///
        /// <para/>
        /// Note: larger FSTs (a few thousand nodes) won't even
        /// render, don't bother.  If the FST is &gt; 2.1 GB in size
        /// then this method will throw strange exceptions.
        /// <para/>
        /// See also <a href="http://www.graphviz.org/">http://www.graphviz.org/</a>.
        /// </summary>
        /// <param name="sameRank">
        ///          If <c>true</c>, the resulting <c>dot</c> file will try
        ///          to order states in layers of breadth-first traversal. This may
        ///          mess up arcs, but makes the output FST's structure a bit clearer.
        /// </param>
        /// <param name="labelStates">
        ///          If <c>true</c> states will have labels equal to their offsets in their
        ///          binary format. Expands the graph considerably.
        /// </param>
        public static void ToDot <T>(FST <T> fst, TextWriter @out, bool sameRank, bool labelStates)
        {
            const string expandedNodeColor = "blue";

            // this is the start arc in the automaton (from the epsilon state to the first state
            // with outgoing transitions.
            FST.Arc <T> startArc = fst.GetFirstArc(new FST.Arc <T>());

            // A queue of transitions to consider for the next level.
            IList <FST.Arc <T> > thisLevelQueue = new List <FST.Arc <T> >();

            // A queue of transitions to consider when processing the next level.
            IList <FST.Arc <T> > nextLevelQueue = new List <FST.Arc <T> >();

            nextLevelQueue.Add(startArc);
            //System.out.println("toDot: startArc: " + startArc);

            // A list of states on the same level (for ranking).
            IList <int?> sameLevelStates = new List <int?>();

            // A bitset of already seen states (target offset).
            BitArray seen = new BitArray(32);

            seen.SafeSet((int)startArc.Target, true);

            // Shape for states.
            const string stateShape      = "circle";
            const string finalStateShape = "doublecircle";

            // Emit DOT prologue.
            @out.Write("digraph FST {\n");
            @out.Write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");

            if (!labelStates)
            {
                @out.Write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
            }

            EmitDotState(@out, "initial", "point", "white", "");

            T   NO_OUTPUT = fst.Outputs.NoOutput;
            var r         = fst.GetBytesReader();

            // final FST.Arc<T> scratchArc = new FST.Arc<>();

            {
                string stateColor;
                if (fst.IsExpandedTarget(startArc, r))
                {
                    stateColor = expandedNodeColor;
                }
                else
                {
                    stateColor = null;
                }

                bool isFinal;
                T    finalOutput;
                if (startArc.IsFinal)
                {
                    isFinal     = true;
                    finalOutput = startArc.NextFinalOutput.Equals(NO_OUTPUT) ? default(T) : startArc.NextFinalOutput;
                }
                else
                {
                    isFinal     = false;
                    finalOutput = default(T);
                }

                EmitDotState(@out, Convert.ToString(startArc.Target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.Outputs.OutputToString(finalOutput));
            }

            @out.Write("  initial -> " + startArc.Target + "\n");

            int level = 0;

            while (nextLevelQueue.Count > 0)
            {
                // we could double buffer here, but it doesn't matter probably.
                //System.out.println("next level=" + level);
                thisLevelQueue.AddRange(nextLevelQueue);
                nextLevelQueue.Clear();

                level++;
                @out.Write("\n  // Transitions and states at level: " + level + "\n");
                while (thisLevelQueue.Count > 0)
                {
                    FST.Arc <T> arc = thisLevelQueue[thisLevelQueue.Count - 1];
                    thisLevelQueue.RemoveAt(thisLevelQueue.Count - 1);
                    //System.out.println("  pop: " + arc);
                    if (FST <T> .TargetHasArcs(arc))
                    {
                        // scan all target arcs
                        //System.out.println("  readFirstTarget...");

                        long node = arc.Target;

                        fst.ReadFirstRealTargetArc(arc.Target, arc, r);

                        //System.out.println("    firstTarget: " + arc);

                        while (true)
                        {
                            //System.out.println("  cycle arc=" + arc);
                            // Emit the unseen state and add it to the queue for the next level.
                            if (arc.Target >= 0 && !seen.SafeGet((int)arc.Target))
                            {
                                /*
                                 * boolean isFinal = false;
                                 * T finalOutput = null;
                                 * fst.readFirstTargetArc(arc, scratchArc);
                                 * if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
                                 * // target is final
                                 * isFinal = true;
                                 * finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
                                 * System.out.println("dot hit final label=" + (char) scratchArc.label);
                                 * }
                                 */
                                string stateColor;
                                if (fst.IsExpandedTarget(arc, r))
                                {
                                    stateColor = expandedNodeColor;
                                }
                                else
                                {
                                    stateColor = null;
                                }

                                string finalOutput;
                                if (arc.NextFinalOutput != null && !arc.NextFinalOutput.Equals(NO_OUTPUT))
                                {
                                    finalOutput = fst.Outputs.OutputToString(arc.NextFinalOutput);
                                }
                                else
                                {
                                    finalOutput = "";
                                }

                                EmitDotState(@out, Convert.ToString(arc.Target), stateShape, stateColor, finalOutput);
                                // To see the node address, use this instead:
                                //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
                                seen.SafeSet((int)arc.Target, true);
                                nextLevelQueue.Add((new FST.Arc <T>()).CopyFrom(arc));
                                sameLevelStates.Add((int)arc.Target);
                            }

                            string outs;
                            if (!arc.Output.Equals(NO_OUTPUT))
                            {
                                outs = "/" + fst.Outputs.OutputToString(arc.Output);
                            }
                            else
                            {
                                outs = "";
                            }

                            if (!FST <T> .TargetHasArcs(arc) && arc.IsFinal && !arc.NextFinalOutput.Equals(NO_OUTPUT))
                            {
                                // Tricky special case: sometimes, due to
                                // pruning, the builder can [sillily] produce
                                // an FST with an arc into the final end state
                                // (-1) but also with a next final output; in
                                // this case we pull that output up onto this
                                // arc
                                outs = outs + "/[" + fst.Outputs.OutputToString(arc.NextFinalOutput) + "]";
                            }

                            string arcColor;
                            if (arc.Flag(FST.BIT_TARGET_NEXT))
                            {
                                arcColor = "red";
                            }
                            else
                            {
                                arcColor = "black";
                            }

                            Debug.Assert(arc.Label != FST.END_LABEL);
                            @out.Write("  " + node + " -> " + arc.Target + " [label=\"" + PrintableLabel(arc.Label) + outs + "\"" + (arc.IsFinal ? " style=\"bold\"" : "") + " color=\"" + arcColor + "\"]\n");

                            // Break the loop if we're on the last arc of this state.
                            if (arc.IsLast)
                            {
                                //System.out.println("    break");
                                break;
                            }
                            fst.ReadNextRealArc(arc, r);
                        }
                    }
                }

                // Emit state ranking information.
                if (sameRank && sameLevelStates.Count > 1)
                {
                    @out.Write("  {rank=same; ");
                    foreach (int state in sameLevelStates)
                    {
                        @out.Write(state + "; ");
                    }
                    @out.Write(" }\n");
                }
                sameLevelStates.Clear();
            }

            // Emit terminating state (always there anyway).
            @out.Write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
            @out.Write("  {rank=sink; -1 }\n");

            @out.Write("}\n");
            @out.Flush();
        }