static StopAnalyzer() { { var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"}; var stopSet = new CharArraySet(stopWords.Length, false); stopSet.AddAll(stopWords); ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet); } }
/// <summary> /// Sole constructor. </summary> public SuggestStopFilter(TokenStream input, CharArraySet stopWords) : base(input) { this.stopWords = stopWords; this.termAtt = AddAttribute<ICharTermAttribute>(); this.posIncAtt = AddAttribute<IPositionIncrementAttribute>(); this.keywordAtt = AddAttribute<IKeywordAttribute>(); this.offsetAtt = AddAttribute<IOffsetAttribute>(); }
public virtual void TestRehash() { CharArraySet cas = new CharArraySet(0, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) cas.Add(TEST_STOP_WORDS[i]); Assert.AreEqual(TEST_STOP_WORDS.Length, cas.Count); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) Assert.IsTrue(cas.Contains(TEST_STOP_WORDS[i])); }
public virtual void TestNonZeroOffset() { System.String[] words = new System.String[]{"Hello", "World", "this", "is", "a", "test"}; char[] findme = "xthisy".ToCharArray(); CharArraySet set_Renamed = new CharArraySet(10, true); for (int i = 0; i < words.Length; i++) { set_Renamed.Add(words[i]); } Assert.IsTrue(set_Renamed.Contains(findme, 1, 4)); Assert.IsTrue(set_Renamed.Contains(new System.String(findme, 1, 4))); }
public virtual void TestClear() { var set = new CharArraySet(10, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set.Add(TEST_STOP_WORDS[i]); } Assert.AreEqual(TEST_STOP_WORDS.Length, set.Count, "Not all words added"); Assert.Throws<NotSupportedException>(set.Clear, "remove is not supported"); Assert.AreEqual(TEST_STOP_WORDS.Length, set.Count, "Not all words added"); }
public virtual void TestObjectContains() { CharArraySet set_Renamed = new CharArraySet(10, true); System.Int32 val = 1; set_Renamed.Add((System.Object) val); Assert.IsTrue(set_Renamed.Contains((System.Object) val)); Assert.IsTrue(set_Renamed.Contains((System.Object) 1)); // test unmodifiable set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); Assert.IsTrue(set_Renamed.Contains((System.Object) val)); Assert.IsTrue(set_Renamed.Contains((System.Object) 1)); }
/// <summary> Construct a token stream filtering the given input. /// If <code>stopWords</code> is an instance of {@link CharArraySet} (true if /// <code>makeStopSet()</code> was used to construct the set) it will be directly used /// and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code> /// directly controls case sensitivity. /// <p/> /// If <code>stopWords</code> is not an instance of {@link CharArraySet}, /// a new CharArraySet will be constructed and <code>ignoreCase</code> will be /// used to specify the case sensitivity of that set. /// /// </summary> /// <param name="input"> /// </param> /// <param name="stopWords">The set of Stop Words. /// </param> /// <param name="ignoreCase">-Ignore case when stopping. /// </param> public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input) { if (stopWords is CharArraySet) { this.stopWords = (CharArraySet) stopWords; } else { this.stopWords = new CharArraySet(stopWords.Count, ignoreCase); foreach (System.String sw in stopWords.Values) { this.stopWords.Add(sw); } } }
/// <summary> Construct a token stream filtering the given input. /// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if /// <c>makeStopSet()</c> was used to construct the set) it will be directly used /// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c> /// directly controls case sensitivity. /// <p/> /// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />, /// a new CharArraySet will be constructed and <c>ignoreCase</c> will be /// used to specify the case sensitivity of that set. /// </summary> /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param> /// <param name="input">Input TokenStream</param> /// <param name="stopWords">A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords</param> /// <param name="ignoreCase">if true, all words are lower cased first</param> public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase) : base(input) { if (stopWords is CharArraySet) { this.stopWords = (CharArraySet) stopWords; } else { this.stopWords = new CharArraySet(stopWords.Count, ignoreCase); this.stopWords.AddAll(stopWords); } this.enablePositionIncrements = enablePositionIncrements; termAtt = AddAttribute<ITermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); }
public virtual void TestClear() { CharArraySet set_Renamed = new CharArraySet(10, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set_Renamed.Add(TEST_STOP_WORDS[i]); } Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added"); try { set_Renamed.Clear(); Assert.Fail("remove is not supported"); } catch (System.NotSupportedException e) { // expected Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added"); } }
public virtual void TestUnmodifiableSet() { CharArraySet set_Renamed = new CharArraySet(10, true); set_Renamed.AddAll(new System.Collections.ArrayList(TEST_STOP_WORDS)); int size = set_Renamed.Count; set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call"); try { CharArraySet.UnmodifiableSet(null); Assert.Fail("can not make null unmodifiable"); } catch (System.NullReferenceException e) { // expected } }
/// <summary> Constructs a filter which removes words from the input /// TokenStream that are named in the array of words. /// </summary> public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed) { this.stopWords = (CharArraySet) MakeStopSet(stopWords, ignoreCase); }
static DefaultSetHolder() { try { DEFAULT_SET = WordlistLoader.GetSnowballWordSet( IOUtils.GetDecodingReader(typeof(SnowballFilter), typeof(SnowballFilter).Namespace + "." + DEFAULT_STOPWORD_FILE, Encoding.UTF8), #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT); #pragma warning restore 612, 618 } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } }
/// <summary> /// Builds an analyzer with the given stop words /// </summary> /// <param name="matchVersion"> /// lucene compatibility version </param> /// <param name="stopwords"> /// a stopword set </param> /// <param name="stemExclusionSet"> /// a stemming exclusion set </param> public GermanAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords) { exclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet)); }
/// <summary> /// Builds an analyzer with the given stop words /// </summary> /// <param name="matchVersion"> /// lucene compatibility version </param> /// <param name="stopwords"> /// a stopword set </param> public GermanAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET) { }
/// <summary> Constructs a filter which removes words from the input /// TokenStream that are named in the array of words. /// </summary> /// <param name="enablePositionIncrements">true if token positions should record the removed stop words /// </param> /// <param name="in">input TokenStream /// </param> /// <param name="stopWords">array of stop words /// </param> /// <param name="ignoreCase">true if case is ignored /// </param> /// <deprecated> Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead. /// </deprecated> public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed) { this.stopWords = (CharArraySet) MakeStopSet(stopWords, ignoreCase); this.enablePositionIncrements = enablePositionIncrements; Init(); }
public virtual void TestUnmodifiableSet() { CharArraySet set_Renamed = new CharArraySet(10, true); set_Renamed.AddAll(new List<string>(TEST_STOP_WORDS)); int size = set_Renamed.Count; set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call"); Assert.Throws<ArgumentNullException>(() => CharArraySet.UnmodifiableSet(null), "can not make null unmodifiable"); }
protected internal CharArraySetEnumerator(CharArraySet creator) { _Creator = creator; }
private void InitBlock(CharArraySet enclosingInstance) { this.enclosingInstance = enclosingInstance; }
/// <summary> /// Sole constructor. </summary> public SuggestStopFilter(TokenStream input, CharArraySet stopWords) : base(input) { this.stopWords = stopWords; }
/// <summary> </summary> /// <param name="stopWords">A List of Strings or char[] or any other toString()-able list representing the stopwords </param> /// <param name="ignoreCase">if true, all words are lower cased first</param> /// <returns>A Set (<see cref="CharArraySet"/>)containing the words</returns> public static ISet<string> MakeStopSet(IList<object> stopWords, bool ignoreCase) { var stopSet = new CharArraySet(stopWords.Count, ignoreCase); foreach(var word in stopWords) stopSet.Add(word.ToString()); return stopSet; }
/// <summary></summary> /// <param name="stopWords">An array of stopwords</param> /// <param name="ignoreCase">If true, all words are lower cased first.</param> /// <returns> a Set containing the words</returns> public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase) { var stopSet = new CharArraySet(stopWords.Length, ignoreCase); stopSet.AddAll(stopWords); return stopSet; }
private static ISet<string> GetStopWords() { int portalId; string cultureCode; var searchDoc = Thread.GetData(Thread.GetNamedDataSlot(Constants.TlsSearchInfo)) as SearchDocument; if (searchDoc == null) { portalId = 0; // default cultureCode = Thread.CurrentThread.CurrentCulture.Name; } else { portalId = searchDoc.PortalId; cultureCode = searchDoc.CultureCode; if (string.IsNullOrEmpty(cultureCode)) { var portalInfo = PortalController.Instance.GetPortal(portalId); if (portalInfo != null) cultureCode = portalInfo.DefaultLanguage; } } var stops = StopAnalyzer.ENGLISH_STOP_WORDS_SET; var searchStopWords = SearchHelper.Instance.GetSearchStopWords(portalId, cultureCode); if (searchStopWords != null && !string.IsNullOrEmpty(searchStopWords.StopWords)) { //TODO Use cache from InternalSearchController var strArray = searchStopWords.StopWords.Split(','); var set = new CharArraySet(strArray.Length, false); set.AddAll(strArray); stops = CharArraySet.UnmodifiableSet(set); } return stops; }
/// <summary> </summary> /// <param name="stopWords"> /// </param> /// <param name="ignoreCase">If true, all words are lower cased first. /// </param> /// <returns> a Set containing the words /// </returns> public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.Length, ignoreCase); for (int i = 0; i < stopWords.Length; i++) { stopSet.Add(stopWords[i]); } return stopSet; }
/// <summary> </summary> /// <param name="stopWords">A List of Strings representing the stopwords /// </param> /// <param name="ignoreCase">if true, all words are lower cased first /// </param> /// <returns> A Set containing the words /// </returns> public static System.Collections.Hashtable MakeStopSet(System.Collections.IList stopWords, bool ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.Count, ignoreCase); stopSet.AddAll(stopWords); return stopSet; }
public ChineseFilter(TokenStream _in) : base(_in) { stopTable = new CharArraySet((IEnumerable<string>)STOP_WORDS, false); termAtt = AddAttribute<ITermAttribute>(); }
public virtual void TestModifyOnUnmodifiable() { //System.Diagnostics.Debugger.Break(); CharArraySet set = new CharArraySet(10, true); set.AddAll(TEST_STOP_WORDS); int size = set.Count; set = CharArraySet.UnmodifiableSet(set); Assert.AreEqual(size, set.Count, "Set size changed due to UnmodifiableSet call"); System.String NOT_IN_SET = "SirGallahad"; Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String already exists in set"); Assert.Throws<NotSupportedException>(() => set.Add(NOT_IN_SET.ToCharArray()), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.Add(NOT_IN_SET), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.Add(new System.Text.StringBuilder(NOT_IN_SET)), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.Clear(), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Changed unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.Add((object)NOT_IN_SET), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.RemoveAll(new List<string>(TEST_STOP_WORDS)), "Modified unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.RetainAll(new List<string>(new[] { NOT_IN_SET })), "Modified unmodifiable set"); Assert.AreEqual(size, set.Count, "Size of unmodifiable set has changed"); Assert.Throws<NotSupportedException>(() => set.AddAll(new List<string>(new[] { NOT_IN_SET })), "Modified unmodifiable set"); Assert.IsFalse(set.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { Assert.IsTrue(set.Contains(TEST_STOP_WORDS[i])); } }
public virtual void TestModifyOnUnmodifiable() { //System.Diagnostics.Debugger.Break(); CharArraySet set_Renamed = new CharArraySet(10, true); set_Renamed.AddAll(TEST_STOP_WORDS); int size = set_Renamed.Count; set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed); Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call"); System.String NOT_IN_SET = "SirGallahad"; Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String already exists in set"); try { set_Renamed.Add(NOT_IN_SET.ToCharArray()); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.Add(NOT_IN_SET); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.Add(new System.Text.StringBuilder(NOT_IN_SET)); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.Clear(); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Changed unmodifiable set"); Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.Add((System.Object) NOT_IN_SET); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.RemoveAll(new System.Collections.ArrayList(TEST_STOP_WORDS)); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.RetainAll(new System.Collections.ArrayList(new System.String[] { NOT_IN_SET })); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed"); } try { set_Renamed.AddAll(new System.Collections.ArrayList(new System.String[] { NOT_IN_SET })); Assert.Fail("Modified unmodifiable set"); } catch (System.NotSupportedException e) { // expected Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set"); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { Assert.IsTrue(set_Renamed.Contains(TEST_STOP_WORDS[i])); } }
/// <summary> Construct a token stream filtering the given input. /// If <code>stopWords</code> is an instance of {@link CharArraySet} (true if /// <code>makeStopSet()</code> was used to construct the set) it will be directly used /// and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code> /// directly controls case sensitivity. /// <p/> /// If <code>stopWords</code> is not an instance of {@link CharArraySet}, /// a new CharArraySet will be constructed and <code>ignoreCase</code> will be /// used to specify the case sensitivity of that set. /// /// </summary> /// <param name="enablePositionIncrements">true if token positions should record the removed stop words /// </param> /// <param name="input">Input TokenStream /// </param> /// <param name="stopWords">The set of Stop Words. /// </param> /// <param name="ignoreCase">-Ignore case when stopping. /// </param> public StopFilter(bool enablePositionIncrements, TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input) { if (stopWords is CharArraySet) { this.stopWords = (CharArraySet) stopWords; } else { this.stopWords = new CharArraySet(stopWords.Count, ignoreCase); this.stopWords.Add(stopWords); } this.enablePositionIncrements = enablePositionIncrements; Init(); }
/// <summary> Returns an unmodifiable {@link CharArraySet}. This allows to provide /// unmodifiable views of internal sets for "read-only" use. /// /// </summary> /// <param name="set">a set for which the unmodifiable set is returned. /// </param> /// <returns> an new unmodifiable {@link CharArraySet}. /// </returns> /// <throws> NullPointerException </throws> /// <summary> if the given set is <code>null</code>. /// </summary> public static CharArraySet UnmodifiableSet(CharArraySet set_Renamed) { if (set_Renamed == null) throw new System.NullReferenceException("Given set is null"); /* * Instead of delegating calls to the given set copy the low-level values to * the unmodifiable Subclass */ return new UnmodifiableCharArraySet(set_Renamed.entries, set_Renamed.ignoreCase, set_Renamed.count); }
/// <summary> </summary> /// <param name="stopWords">An array of stopwords /// </param> /// <param name="ignoreCase">If true, all words are lower cased first. /// </param> /// <returns> a Set containing the words /// </returns> public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.Length, ignoreCase); stopSet.AddAll(new System.Collections.ArrayList(stopWords)); return stopSet; }
internal CharArraySetIterator(CharArraySet enclosingInstance) { InitBlock(enclosingInstance); GoNext(); }
public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, string[] stopWords, bool ignoreCase) : base(in_Renamed) { this.stopWords = (CharArraySet)MakeStopSet(stopWords, ignoreCase); this.enablePositionIncrements = enablePositionIncrements; Init(); }