Esempio n. 1
0
            /// <summary>
            /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
            /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception>
            public virtual StemmerOverrideMap Build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
                Builder <BytesRef>  builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                int[]     sort      = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
                Int32sRef intsSpare = new Int32sRef();
                int       size      = hash.Count;

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.Get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.Add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.Finish(), ignoreCase));
            }
Esempio n. 2
0
            /// <summary>
            /// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary>
            /// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException
            public virtual StemmerOverrideMap build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;

                org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator());
                int[]   sort      = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator);
                IntsRef intsSpare = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = hash.size();
                int size = hash.size();

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.finish(), ignoreCase));
            }
Esempio n. 3
0
            /// <summary>
            /// Builds an <see cref="SynonymMap"/> and returns it.
            /// </summary>
            public virtual SynonymMap Build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
                // TODO: are we using the best sharing options?
                var builder = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                BytesRef            scratch       = new BytesRef(64);
                ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

                HashSet <int?> dedupSet;

                if (dedup)
                {
                    dedupSet = new HashSet <int?>();
                }
                else
                {
                    dedupSet = null;
                }


                var spare = new byte[5];

                ICollection <CharsRef> keys = workingSet.Keys;

                CharsRef[] sortedKeys = keys.ToArray();
#pragma warning disable 612, 618
                System.Array.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparer);
#pragma warning restore 612, 618


                Int32sRef scratchIntsRef = new Int32sRef();

                //System.out.println("fmap.build");
                for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
                {
                    CharsRef input  = sortedKeys[keyIdx];
                    MapEntry output = workingSet[input];

                    int numEntries = output.ords.Count;
                    // output size, assume the worst case
                    int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry

                    scratch.Grow(estimatedSize);
                    scratchOutput.Reset(scratch.Bytes, scratch.Offset, scratch.Bytes.Length);
                    Debug.Assert(scratch.Offset == 0);

                    // now write our output data:
                    int count = 0;
                    for (int i = 0; i < numEntries; i++)
                    {
                        if (dedupSet != null)
                        {
                            // box once
                            int?ent = output.ords[i];
                            if (dedupSet.Contains(ent))
                            {
                                continue;
                            }
                            dedupSet.Add(ent);
                        }
                        scratchOutput.WriteVInt32(output.ords[i]);
                        count++;
                    }

                    int pos = scratchOutput.Position;
                    scratchOutput.WriteVInt32(count << 1 | (output.includeOrig ? 0 : 1));
                    int pos2    = scratchOutput.Position;
                    int vIntLen = pos2 - pos;

                    // Move the count + includeOrig to the front of the byte[]:
                    Array.Copy(scratch.Bytes, pos, spare, 0, vIntLen);
                    Array.Copy(scratch.Bytes, 0, scratch.Bytes, vIntLen, pos);
                    Array.Copy(spare, 0, scratch.Bytes, 0, vIntLen);

                    if (dedupSet != null)
                    {
                        dedupSet.Clear();
                    }

                    scratch.Length = scratchOutput.Position - scratch.Offset;
                    //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
                    builder.Add(Lucene.Net.Util.Fst.Util.ToUTF32(input.ToString(), scratchIntsRef), BytesRef.DeepCopyOf(scratch));
                }

                FST <BytesRef> fst = builder.Finish();
                return(new SynonymMap(fst, words, maxHorizontalContext));
            }
Esempio n. 4
0
            /// <summary>
            /// Builds an <seealso cref="SynonymMap"/> and returns it.
            /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public SynonymMap build() throws java.io.IOException
            public virtual SynonymMap build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;

                // TODO: are we using the best sharing options?
                org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                BytesRef            scratch       = new BytesRef(64);
                ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Set<Integer> dedupSet;
                HashSet <int?> dedupSet;

                if (dedup)
                {
                    dedupSet = new HashSet <>();
                }
                else
                {
                    dedupSet = null;
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] spare = new byte[5];
                sbyte[] spare = new sbyte[5];

                Dictionary <CharsRef, MapEntry> .KeyCollection keys = workingSet.Keys;
                CharsRef[] sortedKeys = keys.toArray(new CharsRef[keys.size()]);
                Arrays.sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparator);

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.IntsRef scratchIntsRef = new org.apache.lucene.util.IntsRef();
                IntsRef scratchIntsRef = new IntsRef();

                //System.out.println("fmap.build");
                for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
                {
                    CharsRef input  = sortedKeys[keyIdx];
                    MapEntry output = workingSet[input];

                    int numEntries = output.ords.Count;
                    // output size, assume the worst case
                    int estimatedSize = 5 + numEntries * 5;     // numEntries + one ord for each entry

                    scratch.grow(estimatedSize);
                    scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
                    Debug.Assert(scratch.offset == 0);

                    // now write our output data:
                    int count = 0;
                    for (int i = 0; i < numEntries; i++)
                    {
                        if (dedupSet != null)
                        {
                            // box once
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final Integer ent = output.ords.get(i);
                            int?ent = output.ords[i];
                            if (dedupSet.Contains(ent))
                            {
                                continue;
                            }
                            dedupSet.Add(ent);
                        }
                        scratchOutput.writeVInt(output.ords[i]);
                        count++;
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos = scratchOutput.getPosition();
                    int pos = scratchOutput.Position;
                    scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos2 = scratchOutput.getPosition();
                    int pos2 = scratchOutput.Position;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int vIntLen = pos2-pos;
                    int vIntLen = pos2 - pos;

                    // Move the count + includeOrig to the front of the byte[]:
                    Array.Copy(scratch.bytes, pos, spare, 0, vIntLen);
                    Array.Copy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
                    Array.Copy(spare, 0, scratch.bytes, 0, vIntLen);

                    if (dedupSet != null)
                    {
                        dedupSet.Clear();
                    }

                    scratch.length = scratchOutput.Position - scratch.offset;
                    //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
                    builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
                }

                FST <BytesRef> fst = builder.finish();

                return(new SynonymMap(fst, words, maxHorizontalContext));
            }