private void Init(System.IO.TextReader _input, HebMorph.DataStructures.DictRadix<int> _prefixesTree) { termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); //posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); typeAtt = AddAttribute <ITypeAttribute>(); input = _input; hebMorphTokenizer = new HebMorph.Tokenizer(_input); prefixesTree = _prefixesTree; }
private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) { termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); this.input = input; this._streamLemmatizer = _lemmatizer; this._streamLemmatizer.SetStream(input); this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal; this.lemmaFilter = _lemmaFilter; }
public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter) //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor { Init(input, _lemmatizer, _lemmaFilter, false); }
public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor { Init(input, _lemmatizer, _lemmaFilter, AlwaysSaveMarkedOriginal); }
protected virtual bool CreateHebrewToken(HebMorph.HebrewToken hebToken) { SetTermText(hebToken.Lemma ?? hebToken.Text.Substring(hebToken.PrefixLength)); posIncrAtt.PositionIncrement = 0; // TODO: typeAtt.SetType(TokenTypeSignature(TOKEN_TYPES.Acronym)); typeAtt.Type = HebrewTokenizer.TokenTypeSignature(HebrewTokenizer.TOKEN_TYPES.Hebrew); /* * Morph payload * byte[] data = new byte[1]; data[0] = (byte)morphResult.Mask; // TODO: Set bits selectively Payload payload = new Payload(data); payAtt.SetPayload(payload); */ return true; }
public MorphAnalyzer(HebMorph.StreamLemmatizer hml) : base() { hebMorphLemmatizer = hml; SetOverridesTokenStreamMethod <MorphAnalyzer>(); }
public MorphAnalyzer(HebMorph.StreamLemmatizer hml) : base() { hebMorphLemmatizer = hml; }
//: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter) { Init(input, _lemmatizer, _lemmaFilter, false); }
//: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor public StreamLemmasFilter(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) { Init(input, _lemmatizer, _lemmaFilter, AlwaysSaveMarkedOriginal); }
protected bool CreateHebrewToken(HebMorph.HebrewToken hebToken, State current) { CreateHebrewToken(hebToken); return true; }
public HebrewTokenizer(System.IO.TextReader _input, HebMorph.DataStructures.DictRadix<int> _prefixesTree) //: base(input) <- converts to CharStream, and causes issues due to a call to ReadToEnd in ctor { Init(_input, _prefixesTree); }