C# (CSharp) Lucene.Net.Analysis.Util CharacterUtils Examples

Programming Language: C# (CSharp)

Namespace/Package Name: Lucene.Net.Analysis.Util

Class/Type: CharacterUtils

Examples at hotexamples.com: 15

C# (CSharp) Lucene.Net.Analysis.Util CharacterUtils - 15 examples found. These are the top rated real world C# (CSharp) examples of Lucene.Net.Analysis.Util.CharacterUtils extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetInstance(14)

NewCharacterBuffer(5)

GetJava4Instance(3)

getInstance(2)

CodePointAt(1)

Fill(1)

ToChars(1)

ToCodePoints(1)

codePointAt(1)

fill(1)

toChars(1)

toCodePoints(1)

CharacterUtils provides a unified interface to Character-related operations to implement backwards compatible character operations based on a Version instance. @lucene.internal

CharacterUtils Class Documentation

Example #1

Show file

File: UpperCaseFilter.cs Project: Cefa68000/lucenenet

 /// <summary>
 /// Create a new UpperCaseFilter, that normalizes token text to upper case.
 /// </summary>
 /// <param name="matchVersion"> See <a href="#version">above</a> </param>
 /// <param name="in"> TokenStream to filter </param>
 public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in)
     : base(@in)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     termAtt = AddAttribute<ICharTermAttribute>();
     charUtils = CharacterUtils.GetInstance(matchVersion);
 }

Example #2

Show file

File: NGramTokenFilter.cs Project: Cefa68000/lucenenet

 /// <summary>
 /// Creates NGramTokenFilter with given min and max n-grams. </summary>
 /// <param name="version"> Lucene version to enable correct position increments.
 ///                See <a href="#version">above</a> for details. </param>
 /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
 /// <param name="minGram"> the smallest n-gram to generate </param>
 /// <param name="maxGram"> the largest n-gram to generate </param>
 public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram)
     : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
 {
     this.version = version;
     this.charUtils = version.OnOrAfter(Version.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
     if (minGram < 1)
     {
         throw new System.ArgumentException("minGram must be greater than zero");
     }
     if (minGram > maxGram)
     {
         throw new System.ArgumentException("minGram must not be greater than maxGram");
     }
     this.minGram = minGram;
     this.maxGram = maxGram;
     if (version.OnOrAfter(Version.LUCENE_44))
     {
         posIncAtt = AddAttribute(typeof(PositionIncrementAttribute));
         posLenAtt = AddAttribute(typeof(PositionLengthAttribute));
     }
     else
     {
         posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
         posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
     }
 }

Example #3

Show file

File: NGramTokenizer.cs Project: paulirwin/lucene.net

	  private void init(Version version, int minGram, int maxGram, bool edgesOnly)
	  {
		if (!version.onOrAfter(Version.LUCENE_44))
		{
		  throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
		}
		charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
		if (minGram < 1)
		{
		  throw new System.ArgumentException("minGram must be greater than zero");
		}
		if (minGram > maxGram)
		{
		  throw new System.ArgumentException("minGram must not be greater than maxGram");
		}
		this.minGram = minGram;
		this.maxGram = maxGram;
		this.edgesOnly = edgesOnly;
		charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
		buffer = new int[charBuffer.Buffer.Length];
		// Make the term att large enough
		termAtt.resizeBuffer(2 * maxGram);
	  }

Example #4

Show file

File: CharTokenizer.cs Project: ChristopherHaws/lucenenet

 /// <summary>
 /// LUCENENET Added in the .NET version to assist with setting the attributes
 /// from multiple constructors.
 /// </summary>
 /// <param name="matchVersion"></param>
 private void Init(LuceneVersion matchVersion)
 {
     charUtils = CharacterUtils.GetInstance(matchVersion);
     termAtt = AddAttribute<ICharTermAttribute>();
     offsetAtt = AddAttribute<IOffsetAttribute>();
 }

Example #5

Show file

 /// <summary>
 /// Creates a new <seealso cref="CharTokenizer"/> instance
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="input">
 ///          the input to split up into tokens </param>
 protected CharTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
     : base(factory, input)
 {
     _input    = input;
     charUtils = CharacterUtils.GetInstance(matchVersion);
 }

Example #6

Show file

 /// <summary>
 /// LUCENENET specific - Added in the .NET version to assist with setting the attributes
 /// from multiple constructors.
 /// </summary>
 /// <param name="matchVersion"></param>
 private void Init(LuceneVersion matchVersion)
 {
     charUtils = CharacterUtils.GetInstance(matchVersion);
     termAtt   = AddAttribute <ICharTermAttribute>();
     offsetAtt = AddAttribute <IOffsetAttribute>();
 }

Example #7

Show file

File: NGramTokenFilter.cs Project: ChristopherHaws/lucenenet

        /// <summary>
        /// Creates NGramTokenFilter with given min and max n-grams. </summary>
        /// <param name="version"> Lucene version to enable correct position increments.
        ///                See <a href="#version">above</a> for details. </param>
        /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
        /// <param name="minGram"> the smallest n-gram to generate </param>
        /// <param name="maxGram"> the largest n-gram to generate </param>
        public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
            : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
        {
            this.version = version;
            this.charUtils = version.OnOrAfter(
#pragma warning disable 612, 618
                LuceneVersion.LUCENE_44) ?
#pragma warning restore 612, 618
                CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
            if (minGram < 1)
            {
                throw new System.ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new System.ArgumentException("minGram must not be greater than maxGram");
            }
            this.minGram = minGram;
            this.maxGram = maxGram;
#pragma warning disable 612, 618
            if (version.OnOrAfter(LuceneVersion.LUCENE_44))
#pragma warning restore 612, 618
            {
                posIncAtt = AddAttribute<IPositionIncrementAttribute>();
                posLenAtt = AddAttribute<IPositionLengthAttribute>();
            }
            else
            {
                posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
                posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
            }
            termAtt = AddAttribute<ICharTermAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
        }

Example #8

Show file

File: CharTokenizer.cs Project: Cefa68000/lucenenet

        /// <summary>
        /// Creates a new <seealso cref="CharTokenizer"/> instance
        /// </summary>
        /// <param name="matchVersion">
        ///          Lucene version to match </param>
        /// <param name="input">
        ///          the input to split up into tokens </param>
        protected CharTokenizer(LuceneVersion matchVersion, TextReader input)
            : base(input)
        {
            termAtt = AddAttribute<ICharTermAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();

            charUtils = CharacterUtils.GetInstance(matchVersion);
        }

Example #9

Show file

File: CharTokenizer.cs Project: Cefa68000/lucenenet

 /// <summary>
 /// Creates a new <seealso cref="CharTokenizer"/> instance
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="input">
 ///          the input to split up into tokens </param>
 protected CharTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
     : base(factory, input)
 {
     _input = input;
     charUtils = CharacterUtils.GetInstance(matchVersion);
 }

Example #10

Show file

File: EdgeNGramTokenFilter.cs Project: Cefa68000/lucenenet

        public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            if (version == null)
            {
              throw new System.ArgumentException("version must not be null");
            }

            if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
            {
              throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
            }

            if (side == null)
            {
              throw new System.ArgumentException("sideLabel must be either front or back");
            }

            if (minGram < 1)
            {
              throw new System.ArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
              throw new System.ArgumentException("minGram must not be greater than maxGram");
            }

            this.version = version;
            this.charUtils = version.onOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side = side;
        }

Example #11

Show file

File: CharTokenizer.cs Project: leotohill/lucene.net

 /// <summary>
 /// Creates a new <seealso cref="CharTokenizer"/> instance
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match </param>
 /// <param name="factory">
 ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
 /// <param name="input">
 ///          the input to split up into tokens </param>
 public CharTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
     : base(factory, input)
 {
     _input    = input;
     charUtils = CharacterUtils.getInstance(matchVersion);
 }

Example #12

Show file

File: CharTokenizer.cs Project: leotohill/lucene.net

 /// <summary>
 /// Creates a new <seealso cref="CharTokenizer"/> instance
 /// </summary>
 /// <param name="matchVersion">
 ///          Lucene version to match </param>
 /// <param name="input">
 ///          the input to split up into tokens </param>
 public CharTokenizer(Version matchVersion, TextReader input)
     : base(input)
 {
     charUtils = CharacterUtils.getInstance(matchVersion);
 }

Example #13

Show file

File: EdgeNGramTokenFilter.cs Project: ChristopherHaws/lucenenet

        public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
              : base(input)
        {

            //if (version == null)
            //{
            //    throw new System.ArgumentException("version must not be null");
            //}

            if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
            {
                throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
            }

            if (!Enum.IsDefined(typeof(Side), side))
            {
                throw new System.ArgumentException("sideLabel must be either front or back");
            }

            if (minGram < 1)
            {
                throw new System.ArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
                throw new System.ArgumentException("minGram must not be greater than maxGram");
            }

            this.version = version;
            this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side = side;

            this.termAtt = AddAttribute<ICharTermAttribute>();
            this.offsetAtt = AddAttribute<IOffsetAttribute>();
            this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
            this.posLenAtt = AddAttribute<IPositionLengthAttribute>();
        }

Example #14

Show file

File: TestCharacterUtils.cs Project: WakeflyCBass/lucenenet

        private void TestConversions(CharacterUtils charUtils)
        {
            var orig = TestUtil.RandomUnicodeString(Random(), 100).toCharArray();

            var buf = new int[orig.Length];

            var restored = new char[buf.Length];

            var o1 = TestUtil.NextInt(Random(), 0, Math.Min(5, orig.Length));
            var o2 = TestUtil.NextInt(Random(), 0, o1);
            var o3 = TestUtil.NextInt(Random(), 0, o1);
            var codePointCount = charUtils.toCodePoints(orig, o1, orig.Length - o1, buf, o2);
            var charCount = charUtils.toChars(buf, o2, codePointCount, restored, o3);
            assertEquals(orig.Length - o1, charCount);
            assertArrayEquals(Arrays.CopyOfRange(orig, o1, o1 + charCount), Arrays.CopyOfRange(restored, o3, o3 + charCount));
        }

Example #15

Show file

File: NGramTokenizer.cs Project: ChristopherHaws/lucenenet

        private void Init(LuceneVersion version, int minGram, int maxGram, bool edgesOnly)
        {
#pragma warning disable 612, 618
            if (!version.OnOrAfter(LuceneVersion.LUCENE_44))
#pragma warning restore 612, 618
            {
                throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
            }
#pragma warning disable 612, 618
            charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ?
#pragma warning restore 612, 618
                CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
            if (minGram < 1)
            {
                throw new System.ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new System.ArgumentException("minGram must not be greater than maxGram");
            }
            termAtt = AddAttribute<ICharTermAttribute>();
            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
            posLenAtt = AddAttribute<IPositionLengthAttribute>();
            offsetAtt = AddAttribute<IOffsetAttribute>();
            this.minGram = minGram;
            this.maxGram = maxGram;
            this.edgesOnly = edgesOnly;
            charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
            buffer = new int[charBuffer.Buffer.Length];



            // Make the term att large enough
            termAtt.ResizeBuffer(2 * maxGram);
        }