public virtual void Inform(ResourceLoader loader) { InputStream stream = null; try { if (dictFile != null) // the dictionary can be empty. { dictionary = getWordSet(loader, dictFile, false); } // TODO: Broken, because we cannot resolve real system id // ResourceLoader should also supply method like ClassLoader to get resource URL stream = loader.openResource(hypFile); InputSource @is = new InputSource(stream); @is.Encoding = encoding; // if it's null let xml parser decide @is.SystemId = hypFile; hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is); } finally { IOUtils.CloseWhileHandlingException(stream); } }
/// <summary> /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the /// dictionaries if Version > 3.0. See <a /// href="CompoundWordTokenFilterBase.html#version" /// >CompoundWordTokenFilterBase</a> for details. </param> /// <param name="input"> /// the <seealso cref="TokenStream"/> to process </param> /// <param name="hyphenator"> /// the hyphenation pattern tree to use for hyphenation </param> /// <param name="dictionary"> /// the word dictionary to match against. </param> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary) : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false) { }
/// <summary> /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the /// dictionaries if Version > 3.0. See <a /// href="CompoundWordTokenFilterBase.html#version" /// >CompoundWordTokenFilterBase</a> for details. </param> /// <param name="input"> /// the <seealso cref="TokenStream"/> to process </param> /// <param name="hyphenator"> /// the hyphenation pattern tree to use for hyphenation </param> /// <param name="dictionary"> /// the word dictionary to match against. </param> /// <param name="minWordSize"> /// only words longer than this get processed </param> /// <param name="minSubwordSize"> /// only subwords longer than this get to the output stream </param> /// <param name="maxSubwordSize"> /// only subwords shorter than this get to the output stream </param> /// <param name="onlyLongestMatch"> /// Add only the longest matching subword to the stream </param> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch) { this.hyphenator = hyphenator; }
/// <summary> /// Create a hyphenator tree /// </summary> /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param> /// <returns> An object representing the hyphenation patterns </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public static HyphenationTree GetHyphenationTree(Stream hyphenationSource, Encoding encoding) { var tree = new HyphenationTree(); tree.LoadPatterns(hyphenationSource, encoding); return tree; }
/// <summary> /// Create a HyphenationCompoundWordTokenFilter with no dictionary. /// <para> /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int) /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE } /// </para> /// </summary> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator) : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE) { }
/// <summary> /// Create a HyphenationCompoundWordTokenFilter with no dictionary. /// <para> /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean) /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, /// null, minWordSize, minSubwordSize, maxSubwordSize } /// </para> /// </summary> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize) : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false) { }
public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator) { this.outerInstance = outerInstance; this.hyphenator = hyphenator; }
/// <summary> /// Create a hyphenator tree /// </summary> /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param> /// <returns> An object representing the hyphenation patterns </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public static HyphenationTree getHyphenationTree(InputSource hyphenationSource) { var tree = new HyphenationTree(); tree.loadPatterns(hyphenationSource); return tree; }