Esempio n. 1
0
        private void Init(System.IO.TextReader _input, HebMorph.DataStructures.DictRadix<int> _prefixesTree)
        {
			termAtt = AddAttribute <ITermAttribute>();
			offsetAtt = AddAttribute <IOffsetAttribute>();
            //posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
			typeAtt = AddAttribute <ITypeAttribute>();
        	input = _input;
            hebMorphTokenizer = new HebMorph.Tokenizer(_input);
            prefixesTree = _prefixesTree;
        }
        private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
            HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal)
        {
			termAtt = AddAttribute <ITermAttribute>();
	        offsetAtt = AddAttribute<IOffsetAttribute>();
	        posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
			typeAtt = AddAttribute <ITypeAttribute>();
            //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute));

        	this.input = input;
            this._streamLemmatizer = _lemmatizer;
            this._streamLemmatizer.SetStream(input);
            this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal;
            this.lemmaFilter = _lemmaFilter;
        }
 public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
     HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter)
 //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor
 {
     Init(input, _lemmatizer, _lemmaFilter, false);
 }
 public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
     HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal)
 //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor
 {
     Init(input, _lemmatizer, _lemmaFilter, AlwaysSaveMarkedOriginal);
 }
        protected virtual bool CreateHebrewToken(HebMorph.HebrewToken hebToken)
        {
            SetTermText(hebToken.Lemma ?? hebToken.Text.Substring(hebToken.PrefixLength));
            posIncrAtt.PositionIncrement = 0;

            // TODO: typeAtt.SetType(TokenTypeSignature(TOKEN_TYPES.Acronym));
            typeAtt.Type = HebrewTokenizer.TokenTypeSignature(HebrewTokenizer.TOKEN_TYPES.Hebrew);

            /*
             * Morph payload
             * 
            byte[] data = new byte[1];
            data[0] = (byte)morphResult.Mask; // TODO: Set bits selectively
            Payload payload = new Payload(data);
            payAtt.SetPayload(payload);
            */

            return true;
        }
Esempio n. 6
0
        public MorphAnalyzer(HebMorph.StreamLemmatizer hml)
            : base()
        {
            hebMorphLemmatizer = hml;
			SetOverridesTokenStreamMethod <MorphAnalyzer>();
        }
Esempio n. 7
0
 public MorphAnalyzer(HebMorph.StreamLemmatizer hml)
     : base()
 {
     hebMorphLemmatizer = hml;
 }
Esempio n. 8
0
 //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor
 public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
     HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter)
 {
     Init(input, _lemmatizer, _lemmaFilter, false);
 }
Esempio n. 9
0
 //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor
 public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
     HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal)
 {
     Init(input, _lemmatizer, _lemmaFilter, AlwaysSaveMarkedOriginal);
 }
Esempio n. 10
0
 protected bool CreateHebrewToken(HebMorph.HebrewToken hebToken, State current)
 {
     CreateHebrewToken(hebToken);
     return true;
 }
Esempio n. 11
0
 public HebrewTokenizer(System.IO.TextReader _input, HebMorph.DataStructures.DictRadix<int> _prefixesTree)
     //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor
 {
     Init(_input, _prefixesTree);
 }