public TheIndexService() { _version = Version.LUCENE_30; _analyzer = new StandardAnalyzer(_version); _path = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory + ConfigurationManager.AppSettings["pathIndex"]); }
private void InitLanguages(Version version) { langs = new HashSet<int>(new int[] { 1, 2 }); langSuffix = new Dictionary<int, string>(); langAnalyzer = new Dictionary<int, Analyzer>(); // Croatian langSuffix[1] = @"hr"; langAnalyzer[1] = new CroAnalyzer(version); // English langSuffix[2] = @"en"; langAnalyzer[2] = new StandardAnalyzer(version); /* // German langSuffix[3] = @"de"; langAnalyzer[3] = new GermanAnalyzer(version); // Italian //langSuffix[4] = @"it"; //langAnalyzer[4] = ???; // Czech langSuffix[5] = @"cz"; langAnalyzer[5] = new CzechAnalyzer(version); * */ }
public LuceneTesterBase(LuceneDirectory directory, LuceneAnalyzer analyzer, LuceneVersion version) { Analyzer = analyzer; CurrentLuceneVersion = version; IndexDirectory = directory; Debug = false; }
/// <summary> /// Initialize this factory via a set of key-value pairs. /// </summary> protected internal AbstractAnalysisFactory(IDictionary<string, string> args) { originalArgs = Collections.UnmodifiableMap(new Dictionary<>(args)); string version = get(args, LUCENE_MATCH_VERSION_PARAM); luceneMatchVersion = version == null ? null : Version.ParseLeniently(version); args.Remove(CLASS_NAME); // consume the class arg }
public Context(Directory directory, Analyzer analyzer, Version version, IIndexWriter indexWriter, object transactionLock) { this.directory = directory; this.analyzer = analyzer; this.version = version; this.indexWriter = indexWriter; this.transactionLock = transactionLock; }
public LuceneIndex(string typeName, string indexDir, IDocumentBuilder docBuilder, IIndexPathBuilder pathBuilder, LN.Util.Version version) { this.TypeName = typeName; this.Directory = indexDir; this.version = version; this.DocumentBuilder = docBuilder; this.IndexPathBuilder = pathBuilder; indexPaths = new Dictionary<string, LuceneIndexPath>(); }
public SearchService() { _lock = new object(); _version = Version.LUCENE_29; _directory = new RAMDirectory(); _writer = new IndexWriter(_directory, new StandardAnalyzer(_version), IndexWriter.MaxFieldLength.UNLIMITED); }
public Query ToQuery(Analyzer analyzer, Version version) { if (Empty) { throw new InvalidOperationException("No key fields defined."); } var query = new BooleanQuery(); values.Apply(kvp => query.Add(Parse(new QueryParser(version, kvp.Key, analyzer), ConvertToQueryExpression(kvp)), Occur.MUST)); return query; }
public SearchManager(string indexLocation) { if (string.IsNullOrEmpty(indexLocation)) throw new FileNotFoundException("The lucene index could not be found."); _luceneVersion = Version.LUCENE_30; var resolvedServerLocation = HttpContext.Current.Server.MapPath(string.Format("~{0}", indexLocation)); _directory = FSDirectory.Open(new DirectoryInfo(resolvedServerLocation)); var createIndex = !IndexReader.IndexExists(_directory); _writer = new IndexWriter(_directory, new StandardAnalyzer(_luceneVersion), createIndex, IndexWriter.MaxFieldLength.UNLIMITED); _analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(_luceneVersion)); }
public PersianAnalyzer(Version version) { _version = version; var fileStream = System.Reflection.Assembly.GetAssembly(GetType()).GetManifestResourceStream("Lucene.Net.Analysis.Fa." + DefaultStopwordFile); if (fileStream != null) using (var reader = new StreamReader(fileStream)) { while (!reader.EndOfStream) { var word = reader.ReadLine(); if (word != null) _stoptable.Add(word, word); } } }
public MyAnalyzer(Version version) { _version = version; var fileStream = new FileStream(@"..\..\..\data\" + DefaultStopwordFile,FileMode.Open); if (fileStream != null) using (var reader = new StreamReader(fileStream)) { while (!reader.EndOfStream) { var word = reader.ReadLine(); if (word != null) { word = SCICT.NLP.Utility.StringUtil.RefineAndFilterPersianWord(word); // Normalize characters of stop words _stoptable.Add(word); } } } }
/* * Builds an analyzer with the given stop words. * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead */ public CzechAnalyzer(Version matchVersion, FileInfo stopwords ) : this(matchVersion, WordlistLoader.GetWordSet( stopwords )) { }
public IDocumentMapper <TModel> ToDocumentMapper(Version version) { ReflectionDocumentMapper <TModel> reflectionDocumentMapper = new ReflectionDocumentMapper <TModel>(version); return(reflectionDocumentMapper); }
public RangeQueryParser(Version matchVersion, string f, Analyzer a) : base(matchVersion, f, a) { }
public Analyzer GetAnalyzer(Version version) { return(new StandardAnalyzer(version)); }
/// <summary> Creates a new StandardTokenizer with a given /// {@link org.apache.lucene.util.AttributeSource.AttributeFactory} /// </summary> public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input) : base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
/* * Wraps {@link StandardAnalyzer}. */ public ShingleAnalyzerWrapper(Version matchVersion) { this.defaultAnalyzer = new StandardAnalyzer(matchVersion); SetOverridesTokenStreamMethod <ShingleAnalyzerWrapper>(); }
public NuGetQueryParser(Version matchVersion, IDocumentMapper <LucenePackage> documentMapper) : base(matchVersion, DefaultSearchFieldName, documentMapper) { }
/* * Builds an analyzer with the given stop words. * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead */ public RussianAnalyzer(Version matchVersion, params string[] stopwords) : this(matchVersion, StopFilter.MakeStopSet(stopwords)) { }
public RussianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET) { }
/* * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead */ public RussianAnalyzer(Version matchVersion, IDictionary <string, string> stopwords) : this(matchVersion, stopwords.Keys.ToArray()) { }
/* * Builds an analyzer with the given stop words * * @param matchVersion * lucene compatibility version * @param stopwords * a stopword set */ public RussianAnalyzer(Version matchVersion, ISet <string> stopwords) { stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords)); this.matchVersion = matchVersion; }
/* * Wraps {@link StandardAnalyzer}. */ public ShingleAnalyzerWrapper(Version matchVersion, int nGramSize) : this(matchVersion) { this.maxShingleSize = nGramSize; }
/* * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead */ public RussianAnalyzer(Version matchVersion, IDictionary<string, string> stopwords) : this(matchVersion, stopwords.Keys.ToArray()) { }
public GermanSubclassAnalyzer(Version matchVersion) : base(matchVersion) { }
/// <summary> /// Constructs a new instance with a client-provided <see cref="Analyzer"/> /// </summary> public LuceneDataProvider(Directory directory, Analyzer externalAnalyzer, Version version) : this(directory, externalAnalyzer, version, null, new object()) { }
/// <summary> Builds an analyzer with the stop words from the given file. /// /// </summary> /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)"> /// </seealso> /// <param name="matchVersion">See <a href="#version">above</a> /// </param> /// <param name="stopwordsFile">File to load stop words from /// </param> public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile) { stopWords = WordlistLoader.GetWordSet(stopwordsFile); enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); }
/// <summary> /// Constructs a new instance. /// </summary> public LuceneDataProvider(Directory directory, Version version) : this(directory, null, version, null, new object()) { }
public DutchSubclassAnalyzer(Version matchVersion) : base(matchVersion) { }
/// <summary> /// Constructs a new instance with an externally provided <see cref="IndexWriter"/> /// </summary> public LuceneDataProvider(Directory directory, Version version, IndexWriter externalWriter) : this(directory, null, version, new IndexWriterAdapter(externalWriter), new object()) { }
public ThaiAnalyzer(Version matchVersion) { SetOverridesTokenStreamMethod <ThaiAnalyzer>(); this.matchVersion = matchVersion; }
/// <summary> /// Constructs a new instance with a client-provided <see cref="Analyzer"/> and <see cref="IndexWriter"/> /// </summary> public LuceneDataProvider(Directory directory, Analyzer externalAnalyzer, Version version, IndexWriter indexWriter) : this(directory, externalAnalyzer, version, new IndexWriterAdapter(indexWriter), new object()) { }
public Directory GetDirectory(Version version) { var indexPathSetting = _portal.ServiceContext.GetSiteSettingValueByName(_portal.Website, "Adxstudio.Xrm.Search.Index.DirectoryPath"); return(string.IsNullOrEmpty(indexPathSetting) ? null : FSDirectory.Open(new DirectoryInfo(indexPathSetting))); }
/// <summary> /// Constructs a new instance with a client provided <see cref="Analyzer"/>. /// If the supplied IndexWriter will be written to outside of this instance of LuceneDataProvider, /// the <paramref name="transactionLock"/> will be used to coordinate writes. /// </summary> public LuceneDataProvider(Directory directory, Version version, IIndexWriter externalWriter, object transactionLock) : this(directory, null, version, externalWriter, transactionLock) { }
protected virtual Analyzer GetAnalyzer(Version version) { return(new PorterStemAnalyzer(version)); }
/// <summary> /// Constructs an instance that will create an <see cref="Analyzer"/> /// using metadata on public properties on the type <typeparamref name="T"/>. /// </summary> /// <param name="version">Version compatibility for analyzers and indexers.</param> protected DocumentMapperBase(Version version) : this(version, null) { }
/* * Builds an analyzer with the given stop words. * * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead */ public CzechAnalyzer(Version matchVersion, HashSet<string> stopwords) : this(matchVersion, (ISet<string>)stopwords) { }
/// <summary> /// Constructs an instance with an externall supplied analyzer /// and the compatibility version of the index. /// </summary> /// <param name="version">Version compatibility for analyzers and indexers.</param> /// <param name="externalAnalyzer"></param> protected DocumentMapperBase(Version version, Analyzer externalAnalyzer) { this.version = version; this.externalAnalyzer = externalAnalyzer; this.analyzer = new PerFieldAnalyzer(new KeywordAnalyzer()); }
/* * Builds an analyzer with the given stop words * * @param matchVersion * lucene compatibility version * @param stopwords * a stopword set */ public RussianAnalyzer(Version matchVersion, ISet<string> stopwords) { stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords)); this.matchVersion = matchVersion; }
public AnalyzerFactory(Version version = Version.LUCENE_30) { this.version = version; InitLanguages(version); }
/// <summary> Builds an analyzer which removes words in ENGLISH_STOP_WORDS.</summary> public StopAnalyzer(Version matchVersion) { stopWords = ENGLISH_STOP_WORDS_SET; enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); }
public Directory GetDirectory(Version version) { return(_factories.Select(factory => factory.GetDirectory(version)).FirstOrDefault(directory => directory != null)); }
public CJKAnalyzer(Version matchVersion, ISet<string> stopWords) { stopTable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords)); this.matchVersion = matchVersion; }
/** * Constructs a new instance with the given parameters. * * @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true * @param Regex * a regular expression delimiting tokens * @param toLowerCase * if <code>true</code> returns tokens after applying * String.toLowerCase() * @param stopWords * if non-null, ignores all tokens that are contained in the * given stop set (after previously having applied toLowerCase() * if applicable). For example, created via * {@link StopFilter#makeStopSet(String[])}and/or * {@link org.apache.lucene.analysis.WordlistLoader}as in * <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code> * or <a href="http://www.unine.ch/info/clef/">other stop words * lists </a>. */ public PatternAnalyzer(Version matchVersion, Regex Regex, bool toLowerCase, ISet<string> stopWords) { if (Regex == null) throw new ArgumentException("Regex must not be null"); if (EqRegex(NON_WORD_PATTERN, Regex)) Regex = NON_WORD_PATTERN; else if (EqRegex(WHITESPACE_PATTERN, Regex)) Regex = WHITESPACE_PATTERN; if (stopWords != null && stopWords.Count == 0) stopWords = null; this.Regex = Regex; this.toLowerCase = toLowerCase; this.stopWords = stopWords; this.matchVersion = matchVersion; }
/// <summary>Builds an analyzer with the stop words from the given set.</summary> public StopAnalyzer(Version matchVersion, ISet<string> stopWords) { this.stopWords = stopWords; enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); }
/// <summary> Creates a new StandardTokenizer with a given /// {@link org.apache.lucene.util.AttributeSource.AttributeFactory} /// </summary> public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
/// <summary> Creates a MultiFieldQueryParser. /// /// <p/> /// It will, when parse(String query) is called, construct a query like this /// (assuming the query consists of two terms and you specify the two fields /// <c>title</c> and <c>body</c>): /// <p/> /// /// <code> /// (title:term1 body:term1) (title:term2 body:term2) /// </code> /// /// <p/> /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: /// <p/> /// /// <code> /// +(title:term1 body:term1) +(title:term2 body:term2) /// </code> /// /// <p/> /// In other words, all the query's terms must appear, but it doesn't matter /// in what fields they appear. /// <p/> /// </summary> public MultiFieldQueryParser(Version matchVersion, System.String[] fields, Analyzer analyzer) : base(matchVersion, null, analyzer) { this.fields = fields; }
public EdgeNGramAnalyzer(Version version, string mingram, string maxgram) { _version = version; _mingram = System.Convert.ToInt16(mingram); _maxgram = System.Convert.ToInt16(maxgram); }
//~ Constructors ----------------------------------------------------------- public CJKAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET) { }
/// <summary> Creates a MultiFieldQueryParser. Allows passing of a map with term to /// Boost, and the boost to apply to each term. /// /// <p/> /// It will, when parse(String query) is called, construct a query like this /// (assuming the query consists of two terms and you specify the two fields /// <c>title</c> and <c>body</c>): /// <p/> /// /// <code> /// (title:term1 body:term1) (title:term2 body:term2) /// </code> /// /// <p/> /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: /// <p/> /// /// <code> /// +(title:term1 body:term1) +(title:term2 body:term2) /// </code> /// /// <p/> /// When you pass a boost (title=>5 body=>10) you can get /// <p/> /// /// <code> /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) /// </code> /// /// <p/> /// In other words, all the query's terms must appear, but it doesn't matter /// in what fields they appear. /// <p/> /// </summary> public MultiFieldQueryParser(Version matchVersion, string[] fields, Analyzer analyzer, IDictionary <string, float> boosts) : this(matchVersion, fields, analyzer) { this.boosts = boosts; }
/// <summary> /// Builds an analyzer which removes words in the provided array. /// </summary> /// <param name="stopWords">stop word array</param> public CJKAnalyzer(Version matchVersion, params string[] stopWords) { stopTable = StopFilter.MakeStopSet(stopWords); this.matchVersion = matchVersion; }
public MoreLikeThisDocumentMapper(Version version) : base(version) { }
/// <summary> Constructs a query parser. /// /// </summary> /// <param name="matchVersion">Lucene version to match. See <a href="#version">above</a>) /// </param> /// <param name="f">the default field for query terms. /// </param> /// <param name="a">used to find terms in the query text. /// </param> public QueryParser(Version matchVersion, System.String f, Analyzer a):this(new FastCharStream(new System.IO.StringReader(""))) { analyzer = a; field = f; if (matchVersion.OnOrAfter(Version.LUCENE_29)) { enablePositionIncrements = true; } else { enablePositionIncrements = false; } }
public SynonymAnalyzer(Version currentVersion, ISynonymEngine engine) { CURRENT_VERSION = currentVersion; SynonymEngine = engine; }
private void Init(System.IO.TextReader input, Version matchVersion) { if (matchVersion.OnOrAfter(Version.LUCENE_24)) { Init(input, true); } else { Init(input, false); } }
public PrefixExpandMultiFieldQueryParser(Version matchVersion, string[] fields, Analyzer analyzer, IDictionary <string, float> boosts, IndexReader indexReader) : base(matchVersion, fields, analyzer, boosts) { _indexReader = indexReader; }
public EdgeNGramWithStemmingAnalyzer(Version version, string mingram, string maxgram) { _version = version; _mingram = System.Convert.ToInt16(mingram); _maxgram = System.Convert.ToInt16(maxgram); }
public PrefixExpandMultiFieldQueryParser(Version matchVersion, string[] fields, Analyzer analyzer, IndexReader indexReader) : base(matchVersion, fields, analyzer) { _indexReader = indexReader; }