Ejemplo n.º 1
0
        /// <summary>
        /// Create instance of Lucy Engine
        /// </summary>
        /// <param name="yamlOrJson">YAML or JSON of a lucy document.</param>
        /// <param name="exactAnalyzer">Optional analyzer override to use for token matching.</param>
        /// <param name="fuzzyAnalyzer">Optional analyzer override to use for fuzzy token matching.</param>
        /// <param name="useAllBuiltIns">Enable all built ins (default is to only enable built ins that are referred to). This argument is only useful for design time.</param>
        public LucyEngine(string yamlOrJson, Analyzer exactAnalyzer = null, Analyzer fuzzyAnalyzer = null, bool useAllBuiltIns = false)
        {
            LucyDocument document = null;

            if (!yamlOrJson.TrimStart().StartsWith("{"))
            {
                document = yamlDeserializer.Deserialize <LucyDocument>(new StringReader(yamlOrJson));
            }
            else
            {
                document = JsonConvert.DeserializeObject <LucyDocument>(yamlOrJson);
            }
            LoadDocument(document, exactAnalyzer, fuzzyAnalyzer, useAllBuiltIns);
        }
Ejemplo n.º 2
0
        private void LoadDocument(LucyDocument model, Analyzer exactAnalyzer, Analyzer fuzzyAnalyzer, Boolean useAllBuiltIns)
        {
            this._lucyModel = model;

            this._exactAnalyzer = exactAnalyzer ?? GetAnalyzerForLocale(model.Locale);

            this._fuzzyAnalyzer = exactAnalyzer ?? fuzzyAnalyzer ??
                                  Analyzer.NewAnonymous((field, textReader) =>
            {
                Tokenizer tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_48, textReader);
                TokenStream stream  = new DoubleMetaphoneFilter(tokenizer, 6, false);
                //TokenStream stream = new BeiderMorseFilterFactory(new Dictionary<string, string>()
                //    {
                //        { "nameType", NameType.GENERIC.ToString()},
                //        { "ruleType", RuleType.APPROX.ToString() },
                //        { "languageSet", "auto"}
                //    }).Create(tokenizer);
                return(new TokenStreamComponents(tokenizer, stream));
            });

            this._patternParser = new PatternParser(this._exactAnalyzer, this._fuzzyAnalyzer);;

            if (_lucyModel.Macros == null)
            {
                _lucyModel.Macros = new Dictionary <string, string>();
            }

            if (_lucyModel.Entities != null)
            {
                foreach (var entityModel in _lucyModel.Entities)
                {
                    if (entityModel.Patterns != null)
                    {
                        foreach (var patternModel in entityModel.Patterns)
                        {
                            var    first      = patternModel.First();
                            string resolution = first.Any(ch => ch == '@' || ch == '|' || ch == '+' || ch == '*' || ch == '?') || first.Contains("___") ? null : first.Trim('~', '(', ')');

                            foreach (var pattern in patternModel.Select(pat => ExpandMacros(pat)).OrderByDescending(pat => pat.Length))
                            {
                                if (pattern.StartsWith('/') && pattern.EndsWith('/'))
                                {
                                    RegexEntityPatterns.Add(new RegexEntityRecognizer(entityModel.Name, pattern.Trim('/')));
                                }
                                else
                                {
                                    var patternMatcher = _patternParser.Parse(pattern, entityModel.FuzzyMatch);
                                    if (patternMatcher != null)
                                    {
                                        var ignoreWords = entityModel.Ignore?.Select(ignoreText => ((TokenResolution)Tokenize(ignoreText).First().Resolution).Token) ?? Array.Empty <string>();

                                        // Trace.TraceInformation($"{expandedPattern} => {patternMatcher}");
                                        if (patternMatcher.ContainsWildcard())
                                        {
                                            // we want to process wildcard patterns last
                                            WildcardEntityPatterns.Add(new EntityPattern(entityModel.Name, resolution, patternMatcher, ignoreWords));
                                        }
                                        else
                                        {
                                            EntityPatterns.Add(new EntityPattern(entityModel.Name, resolution, patternMatcher, ignoreWords));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                // Auto detect all references to built in entities
                foreach (var pattern in this.EntityPatterns.ToList())
                {
                    foreach (var reference in pattern.PatternMatcher.GetEntityReferences().Select(r => r.TrimStart('@')))
                    {
                        if (reference == "datetime" || reference == "datetimeV2")
                        {
                            this.BuiltinEntities.Add("datetime");

                            // add default pattern for datetime = (all permutations of datetime)
                            EntityPatterns.Add(new EntityPattern("datetime", _patternParser.Parse("(@datetimeV2.date|@datetimeV2.time|@datetimeV2.datetime|@datetimeV2.daterange|@datetimeV2.timerange|@datetimeV2.datetimerange|@datetimeV2.duration)")));
                        }

                        if (builtinEntities.Contains(reference) ||
                            builtinEntities.Contains(reference.Split('.').First()))
                        {
                            this.BuiltinEntities.Add(reference);
                        }
                    }
                }

                if (model.ExternalEntities != null)
                {
                    foreach (var externalEntity in model.ExternalEntities)
                    {
                        if (builtinEntities.Contains(externalEntity) ||
                            builtinEntities.Contains(externalEntity.Split('.').First()))
                        {
                            this.BuiltinEntities.Add(externalEntity);
                        }
                    }
                }
            }

            if (useAllBuiltIns)
            {
                BuiltinEntities = new HashSet <string>(builtinEntities);
                // add default pattern for datetime = (all permutations of datetime)
                EntityPatterns.Add(new EntityPattern("datetime", _patternParser.Parse("(@datetimeV2.date|@datetimeV2.time|@datetimeV2.datetime|@datetimeV2.daterange|@datetimeV2.timerange|@datetimeV2.datetimerange|@datetimeV2.duration)")));
            }

            ValidateModel();
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Create instance of Lucy Engine
 /// </summary>
 /// <param name="document">a lucy document.</param>
 /// <param name="exactAnalyzer">Optional analyzer override to use for token matching.</param>
 /// <param name="fuzzyAnalyzer">Optional analyzer override to use for fuzzy token matching.</param>
 /// <param name="useAllBuiltIns">Enable all built ins (default is to only enable built ins that are referred to). This argument is only useful for design time.</param>
 public LucyEngine(LucyDocument document, Analyzer exactAnalyzer = null, Analyzer fuzzyAnalyzer = null, bool useAllBuiltIns = false)
 {
     LoadDocument(document, exactAnalyzer, fuzzyAnalyzer, useAllBuiltIns);
 }