Exemplo n.º 1
0
 /*
  * Reads a stemdictionary file , that overrules the stemming algorithm
  * This is a textfile that contains per line
  * <tt>word<b>\t</b>stem</tt>, i.e: two tab seperated words
  */
 public void SetStemDictionary(FileInfo stemdictFile)
 {
     try
     {
         stemdict            = WordlistLoader.GetStemDict(stemdictFile);
         PreviousTokenStream = null; // force a new stemmer to be created
     }
     catch (IOException e)
     {
         // TODO: throw IOException
         throw new Exception(string.Empty, e);
     }
 }
Exemplo n.º 2
0
 /*
  * Builds an exclusionlist from the words contained in the given file.
  * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
  */
 public void SetStemExclusionTable(FileInfo exclusionlist)
 {
     try
     {
         excltable           = WordlistLoader.GetWordSet(exclusionlist);
         PreviousTokenStream = null; // force a new stemmer to be created
     }
     catch (IOException e)
     {
         // TODO: throw IOException
         throw new Exception("", e);
     }
 }
Exemplo n.º 3
0
 /*
  * Builds an analyzer with the given stop words.
  *
  * @param stopwords
  * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
  */
 public DutchAnalyzer(Version matchVersion, FileInfo stopwords)
 {
     // this is completely broken!
     SetOverridesTokenStreamMethod <DutchAnalyzer>();
     try
     {
         stoptable = WordlistLoader.GetWordSet(stopwords);
     }
     catch (IOException e)
     {
         // TODO: throw IOException
         throw new Exception("", e);
     }
     this.matchVersion = matchVersion;
 }
Exemplo n.º 4
0
            private static CharArraySet LoadDefaultStopSet() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
            {
                try
                {
                    return(WordlistLoader.GetSnowballWordSet(
                               IOUtils.GetDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, Encoding.UTF8),
#pragma warning disable 612, 618
                               LuceneVersion.LUCENE_CURRENT));

#pragma warning restore 612, 618
                }
                catch (IOException ex)
                {
                    // default set should always be present as it is part of the
                    // distribution (JAR)
                    throw new Exception("Unable to load default stopword set", ex);
                }
            }
Exemplo n.º 5
0
            static DefaultSetHolder()
            {
                try
                {
                    DEFAULT_STOP_SET = WordlistLoader.GetSnowballWordSet(
                        IOUtils.GetDecodingReader(typeof(SnowballFilter), typeof(SnowballFilter).Namespace + "." + DEFAULT_STOPWORD_FILE, Encoding.UTF8),
                        LuceneVersion.LUCENE_CURRENT);
                }
                catch (IOException)
                {
                    // default set should always be present as it is part of the
                    // distribution (JAR)
                    throw new Exception("Unable to load default stopword set");
                }

                DEFAULT_STEM_DICT = new CharArrayMap <string>(LuceneVersion.LUCENE_CURRENT, 4, false);
                DEFAULT_STEM_DICT.Put("fiets", "fiets");         //otherwise fiet
                DEFAULT_STEM_DICT.Put("bromfiets", "bromfiets"); //otherwise bromfiet
                DEFAULT_STEM_DICT.Put("ei", "eier");
                DEFAULT_STEM_DICT.Put("kind", "kinder");
            }
Exemplo n.º 6
0
 /// <summary>
 /// Reads a stemdictionary file , that overrules the stemming algorithm
 /// This is a textfile that contains per line
 /// word\tstem
 /// i.e: tabseperated
 /// </summary>
 /// <param name="stemdict"></param>
 public void SetStemDictionary(FileInfo stemdict)
 {
     _stemdict = WordlistLoader.GetStemDict(stemdict);
 }
Exemplo n.º 7
0
 /// <summary>
 /// Builds an exclusionlist from the words contained in the given file.
 /// </summary>
 /// <param name="exclusionlist"></param>
 public void SetStemExclusionTable(FileInfo exclusionlist)
 {
     excltable = WordlistLoader.GetWordtable(exclusionlist);
 }
Exemplo n.º 8
0
 /// <summary>
 /// Builds an analyzer with the given stop words.
 /// </summary>
 /// <param name="stopwords"></param>
 public DutchAnalyzer(FileInfo stopwords)
 {
     stoptable = WordlistLoader.GetWordtable(stopwords);
 }