/// <summary> /// Create instance of Lucy Engine /// </summary> /// <param name="yamlOrJson">YAML or JSON of a lucy document.</param> /// <param name="exactAnalyzer">Optional analyzer override to use for token matching.</param> /// <param name="fuzzyAnalyzer">Optional analyzer override to use for fuzzy token matching.</param> /// <param name="useAllBuiltIns">Enable all built ins (default is to only enable built ins that are referred to). This argument is only useful for design time.</param> public LucyEngine(string yamlOrJson, Analyzer exactAnalyzer = null, Analyzer fuzzyAnalyzer = null, bool useAllBuiltIns = false) { LucyDocument document = null; if (!yamlOrJson.TrimStart().StartsWith("{")) { document = yamlDeserializer.Deserialize <LucyDocument>(new StringReader(yamlOrJson)); } else { document = JsonConvert.DeserializeObject <LucyDocument>(yamlOrJson); } LoadDocument(document, exactAnalyzer, fuzzyAnalyzer, useAllBuiltIns); }
private void LoadDocument(LucyDocument model, Analyzer exactAnalyzer, Analyzer fuzzyAnalyzer, Boolean useAllBuiltIns) { this._lucyModel = model; this._exactAnalyzer = exactAnalyzer ?? GetAnalyzerForLocale(model.Locale); this._fuzzyAnalyzer = exactAnalyzer ?? fuzzyAnalyzer ?? Analyzer.NewAnonymous((field, textReader) => { Tokenizer tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_48, textReader); TokenStream stream = new DoubleMetaphoneFilter(tokenizer, 6, false); //TokenStream stream = new BeiderMorseFilterFactory(new Dictionary<string, string>() // { // { "nameType", NameType.GENERIC.ToString()}, // { "ruleType", RuleType.APPROX.ToString() }, // { "languageSet", "auto"} // }).Create(tokenizer); return(new TokenStreamComponents(tokenizer, stream)); }); this._patternParser = new PatternParser(this._exactAnalyzer, this._fuzzyAnalyzer);; if (_lucyModel.Macros == null) { _lucyModel.Macros = new Dictionary <string, string>(); } if (_lucyModel.Entities != null) { foreach (var entityModel in _lucyModel.Entities) { if (entityModel.Patterns != null) { foreach (var patternModel in entityModel.Patterns) { var first = patternModel.First(); string resolution = first.Any(ch => ch == '@' || ch == '|' || ch == '+' || ch == '*' || ch == '?') || first.Contains("___") ? null : first.Trim('~', '(', ')'); foreach (var pattern in patternModel.Select(pat => ExpandMacros(pat)).OrderByDescending(pat => pat.Length)) { if (pattern.StartsWith('/') && pattern.EndsWith('/')) { RegexEntityPatterns.Add(new RegexEntityRecognizer(entityModel.Name, pattern.Trim('/'))); } else { var patternMatcher = _patternParser.Parse(pattern, entityModel.FuzzyMatch); if (patternMatcher != null) { var ignoreWords = entityModel.Ignore?.Select(ignoreText => ((TokenResolution)Tokenize(ignoreText).First().Resolution).Token) ?? Array.Empty <string>(); // Trace.TraceInformation($"{expandedPattern} => {patternMatcher}"); if (patternMatcher.ContainsWildcard()) { // we want to process wildcard patterns last WildcardEntityPatterns.Add(new EntityPattern(entityModel.Name, resolution, patternMatcher, ignoreWords)); } else { EntityPatterns.Add(new EntityPattern(entityModel.Name, resolution, patternMatcher, ignoreWords)); } } } } } } } // Auto detect all references to built in entities foreach (var pattern in this.EntityPatterns.ToList()) { foreach (var reference in pattern.PatternMatcher.GetEntityReferences().Select(r => r.TrimStart('@'))) { if (reference == "datetime" || reference == "datetimeV2") { this.BuiltinEntities.Add("datetime"); // add default pattern for datetime = (all permutations of datetime) EntityPatterns.Add(new EntityPattern("datetime", _patternParser.Parse("(@datetimeV2.date|@datetimeV2.time|@datetimeV2.datetime|@datetimeV2.daterange|@datetimeV2.timerange|@datetimeV2.datetimerange|@datetimeV2.duration)"))); } if (builtinEntities.Contains(reference) || builtinEntities.Contains(reference.Split('.').First())) { this.BuiltinEntities.Add(reference); } } } if (model.ExternalEntities != null) { foreach (var externalEntity in model.ExternalEntities) { if (builtinEntities.Contains(externalEntity) || builtinEntities.Contains(externalEntity.Split('.').First())) { this.BuiltinEntities.Add(externalEntity); } } } } if (useAllBuiltIns) { BuiltinEntities = new HashSet <string>(builtinEntities); // add default pattern for datetime = (all permutations of datetime) EntityPatterns.Add(new EntityPattern("datetime", _patternParser.Parse("(@datetimeV2.date|@datetimeV2.time|@datetimeV2.datetime|@datetimeV2.daterange|@datetimeV2.timerange|@datetimeV2.datetimerange|@datetimeV2.duration)"))); } ValidateModel(); }
/// <summary> /// Create instance of Lucy Engine /// </summary> /// <param name="document">a lucy document.</param> /// <param name="exactAnalyzer">Optional analyzer override to use for token matching.</param> /// <param name="fuzzyAnalyzer">Optional analyzer override to use for fuzzy token matching.</param> /// <param name="useAllBuiltIns">Enable all built ins (default is to only enable built ins that are referred to). This argument is only useful for design time.</param> public LucyEngine(LucyDocument document, Analyzer exactAnalyzer = null, Analyzer fuzzyAnalyzer = null, bool useAllBuiltIns = false) { LoadDocument(document, exactAnalyzer, fuzzyAnalyzer, useAllBuiltIns); }