/// <summary>
        /// Creates a new <see cref="CJKBigramFilterFactory"/> </summary>
        public CJKBigramFilterFactory(IDictionary <string, string> args)
            : base(args)
        {
            CJKScript flags = 0;

            if (GetBoolean(args, "han", true))
            {
                flags |= CJKScript.HAN;
            }
            if (GetBoolean(args, "hiragana", true))
            {
                flags |= CJKScript.HIRAGANA;
            }
            if (GetBoolean(args, "katakana", true))
            {
                flags |= CJKScript.KATAKANA;
            }
            if (GetBoolean(args, "hangul", true))
            {
                flags |= CJKScript.HANGUL;
            }
            this.flags          = flags;
            this.outputUnigrams = GetBoolean(args, "outputUnigrams", false);
            if (args.Count > 0)
            {
                throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args));
            }
        }
        /// <summary>
        /// Creates a new <see cref="CJKBigramFilterFactory"/> </summary>
        public CJKBigramFilterFactory(IDictionary <string, string> args)
            : base(args)
        {
            CJKScript flags = 0;

            if (GetBoolean(args, "han", true))
            {
                flags |= CJKScript.HAN;
            }
            if (GetBoolean(args, "hiragana", true))
            {
                flags |= CJKScript.HIRAGANA;
            }
            if (GetBoolean(args, "katakana", true))
            {
                flags |= CJKScript.KATAKANA;
            }
            if (GetBoolean(args, "hangul", true))
            {
                flags |= CJKScript.HANGUL;
            }
            this.flags          = flags;
            this.outputUnigrams = GetBoolean(args, "outputUnigrams", false);
            if (args.Count > 0)
            {
                throw new System.ArgumentException("Unknown parameters: " + args);
            }
        }
Beispiel #3
0
 /// <summary>
 /// Create a new <see cref="CJKBigramFilter"/>, specifying which writing systems should be bigrammed,
 /// and whether or not unigrams should also be output. </summary>
 /// <param name="in">
 ///          Input <see cref="TokenStream"/> </param>
 /// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
 ///        <see cref="CJKScript.KATAKANA"/>, <see cref="CJKScript.HANGUL"/> </param>
 /// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
 ///        when this is false, this is only done when there are no adjacent characters to form
 ///        a bigram. </param>
 public CJKBigramFilter(TokenStream @in, CJKScript flags, bool outputUnigrams)
     : base(@in)
 {
     doHan               = (flags & CJKScript.HAN) == 0 ? NO : HAN_TYPE;
     doHiragana          = (flags & CJKScript.HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
     doKatakana          = (flags & CJKScript.KATAKANA) == 0 ? NO : KATAKANA_TYPE;
     doHangul            = (flags & CJKScript.HANGUL) == 0 ? NO : HANGUL_TYPE;
     this.outputUnigrams = outputUnigrams;
     this.termAtt        = AddAttribute <ICharTermAttribute>();
     this.typeAtt        = AddAttribute <ITypeAttribute>();
     this.offsetAtt      = AddAttribute <IOffsetAttribute>();
     this.posIncAtt      = AddAttribute <IPositionIncrementAttribute>();
     this.posLengthAtt   = AddAttribute <IPositionLengthAttribute>();
 }
Beispiel #4
0
 /// <summary>
 /// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, CJKScript, bool)">
 ///       CJKBigramFilter(in, flags, false)</see>
 /// </summary>
 /// <param name="in">
 ///          Input <see cref="TokenStream"/> </param>
 /// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
 ///        <see cref="CJKScript.KATAKANA"/>, <see cref="CJKScript.HANGUL"/> </param>
 public CJKBigramFilter(TokenStream @in, CJKScript flags)
     : this(@in, flags, false)
 {
 }