/// <summary>
            /// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary>
            /// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns>
            /// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException
            public virtual StemmerOverrideMap build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;

                org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator());
                int[]   sort      = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator);
                IntsRef intsSpare = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = hash.size();
                int size = hash.size();

                for (int i = 0; i < size; i++)
                {
                    int      id       = sort[i];
                    BytesRef bytesRef = hash.get(id, spare);
                    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
                    builder.add(intsSpare, new BytesRef(outputValues[id]));
                }
                return(new StemmerOverrideMap(builder.finish(), ignoreCase));
            }
		/// <summary>
		/// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary>
		/// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns>
		/// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException
		public virtual StemmerOverrideMap build()
		{
		  ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
		  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator());
		  int[] sort = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator);
		  IntsRef intsSpare = new IntsRef();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = hash.size();
		  int size = hash.size();
		  for (int i = 0; i < size; i++)
		  {
			int id = sort[i];
			BytesRef bytesRef = hash.get(id, spare);
			UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
			builder.add(intsSpare, new BytesRef(outputValues[id]));
		  }
		  return new StemmerOverrideMap(builder.finish(), ignoreCase);
		}
Exemple #3
0
            /// <summary>
            /// Builds an <seealso cref="SynonymMap"/> and returns it.
            /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public SynonymMap build() throws java.io.IOException
            public virtual SynonymMap build()
            {
                ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;

                // TODO: are we using the best sharing options?
                org.apache.lucene.util.fst.Builder <BytesRef> builder = new org.apache.lucene.util.fst.Builder <BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

                BytesRef            scratch       = new BytesRef(64);
                ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Set<Integer> dedupSet;
                HashSet <int?> dedupSet;

                if (dedup)
                {
                    dedupSet = new HashSet <>();
                }
                else
                {
                    dedupSet = null;
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] spare = new byte[5];
                sbyte[] spare = new sbyte[5];

                Dictionary <CharsRef, MapEntry> .KeyCollection keys = workingSet.Keys;
                CharsRef[] sortedKeys = keys.toArray(new CharsRef[keys.size()]);
                Arrays.sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparator);

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.IntsRef scratchIntsRef = new org.apache.lucene.util.IntsRef();
                IntsRef scratchIntsRef = new IntsRef();

                //System.out.println("fmap.build");
                for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
                {
                    CharsRef input  = sortedKeys[keyIdx];
                    MapEntry output = workingSet[input];

                    int numEntries = output.ords.Count;
                    // output size, assume the worst case
                    int estimatedSize = 5 + numEntries * 5;     // numEntries + one ord for each entry

                    scratch.grow(estimatedSize);
                    scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
                    Debug.Assert(scratch.offset == 0);

                    // now write our output data:
                    int count = 0;
                    for (int i = 0; i < numEntries; i++)
                    {
                        if (dedupSet != null)
                        {
                            // box once
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final Integer ent = output.ords.get(i);
                            int?ent = output.ords[i];
                            if (dedupSet.Contains(ent))
                            {
                                continue;
                            }
                            dedupSet.Add(ent);
                        }
                        scratchOutput.writeVInt(output.ords[i]);
                        count++;
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos = scratchOutput.getPosition();
                    int pos = scratchOutput.Position;
                    scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos2 = scratchOutput.getPosition();
                    int pos2 = scratchOutput.Position;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int vIntLen = pos2-pos;
                    int vIntLen = pos2 - pos;

                    // Move the count + includeOrig to the front of the byte[]:
                    Array.Copy(scratch.bytes, pos, spare, 0, vIntLen);
                    Array.Copy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
                    Array.Copy(spare, 0, scratch.bytes, 0, vIntLen);

                    if (dedupSet != null)
                    {
                        dedupSet.Clear();
                    }

                    scratch.length = scratchOutput.Position - scratch.offset;
                    //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
                    builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
                }

                FST <BytesRef> fst = builder.finish();

                return(new SynonymMap(fst, words, maxHorizontalContext));
            }
Exemple #4
0
		/// <summary>
		/// Builds an <seealso cref="SynonymMap"/> and returns it.
		/// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public SynonymMap build() throws java.io.IOException
		public virtual SynonymMap build()
		{
		  ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
		  // TODO: are we using the best sharing options?
		  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);

		  BytesRef scratch = new BytesRef(64);
		  ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Set<Integer> dedupSet;
		  HashSet<int?> dedupSet;

		  if (dedup)
		  {
			dedupSet = new HashSet<>();
		  }
		  else
		  {
			dedupSet = null;
		  }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final byte[] spare = new byte[5];
		  sbyte[] spare = new sbyte[5];

		  Dictionary<CharsRef, MapEntry>.KeyCollection keys = workingSet.Keys;
		  CharsRef[] sortedKeys = keys.toArray(new CharsRef[keys.size()]);
		  Arrays.sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparator);

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.IntsRef scratchIntsRef = new org.apache.lucene.util.IntsRef();
		  IntsRef scratchIntsRef = new IntsRef();

		  //System.out.println("fmap.build");
		  for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
		  {
			CharsRef input = sortedKeys[keyIdx];
			MapEntry output = workingSet[input];

			int numEntries = output.ords.Count;
			// output size, assume the worst case
			int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry

			scratch.grow(estimatedSize);
			scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
			Debug.Assert(scratch.offset == 0);

			// now write our output data:
			int count = 0;
			for (int i = 0; i < numEntries; i++)
			{
			  if (dedupSet != null)
			  {
				// box once
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final Integer ent = output.ords.get(i);
				int? ent = output.ords[i];
				if (dedupSet.Contains(ent))
				{
				  continue;
				}
				dedupSet.Add(ent);
			  }
			  scratchOutput.writeVInt(output.ords[i]);
			  count++;
			}

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos = scratchOutput.getPosition();
			int pos = scratchOutput.Position;
			scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos2 = scratchOutput.getPosition();
			int pos2 = scratchOutput.Position;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int vIntLen = pos2-pos;
			int vIntLen = pos2 - pos;

			// Move the count + includeOrig to the front of the byte[]:
			Array.Copy(scratch.bytes, pos, spare, 0, vIntLen);
			Array.Copy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
			Array.Copy(spare, 0, scratch.bytes, 0, vIntLen);

			if (dedupSet != null)
			{
			  dedupSet.Clear();
			}

			scratch.length = scratchOutput.Position - scratch.offset;
			//System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
			builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
		  }

		  FST<BytesRef> fst = builder.finish();
		  return new SynonymMap(fst, words, maxHorizontalContext);
		}