private RuleBasedWithPatternsIndex CreateRuleBasedIndexWithPatterns() { var ruleBasedIndex = RuleBasedIndexFactory.Create(5, File.ReadLines("RuleBasedMatchingWithPatternsData-Patterns.tsv").Select(x => x.Split('\t')) .Where(y => y.Length >= 3).ToDictionary(z => z[0], z => z.Skip(2).ToList())) as RuleBasedWithPatternsIndex; foreach (var line in File.ReadLines("RuleBasedMatchingWithPatternsData.tsv")) { var splits = line.Split('\t'); if (splits.Length < 3) { OutputHelper.WriteLine($"[ERROR] Invalid line: {line}"); continue; } if (!Enum.TryParse(splits[1], true, out MatchingRuleType ruleType)) { OutputHelper.WriteLine($"[ERROR] Invalid ruletype: {line}"); continue; } foreach (var temp in splits[0].Split('|')) { ruleBasedIndex.Add(MatchingRuleItem.Create(temp.Split(';').Where(x => !string.IsNullOrEmpty(x)), splits.Skip(2), ruleType)); } } return(ruleBasedIndex); }
public void Match() { var ruleBasedIndex = RuleBasedIndexFactory.Create(5) as RuleBasedIndex; foreach (var line in File.ReadLines("RuleBasedMatchingData.tsv")) { var splits = line.Split('\t'); if (splits.Length < 3) { OutputHelper.WriteLine($"[ERROR] Invalid line: {line}"); continue; } if (!Enum.TryParse(splits[1], true, out MatchingRuleType ruleType)) { OutputHelper.WriteLine($"[ERROR] Invalid ruletype: {line}"); continue; } ruleBasedIndex.Add(MatchingRuleItem.Create(splits[0].Split(';').Where(x => !string.IsNullOrEmpty(x)), splits.Skip(2), ruleType)); } var stopWatch = Stopwatch.StartNew(); foreach (var perfect in ruleBasedIndex.Perfect) { var ret = ruleBasedIndex.Retrieve(perfect.Key).FirstOrDefault(); Assert.NotNull(ret); OutputHelper.WriteLine($"Variation={perfect.Key}. Ret={JsonConvert.SerializeObject(ret)}"); Assert.Equal(MatchingRuleType.Perfect, ret.Type); } foreach (var ruleItems in ruleBasedIndex.RuleItems) { foreach (var ruleItem in ruleItems.Value) { var variations = new List <string>() { string.Join("", ruleItem.KeyWords), "fdfd" + string.Join("---", ruleItem.KeyWords) + "fdfdfd", string.Join(";fdfd", ruleItem.KeyWords.OrderBy(a => Guid.NewGuid())) }; foreach (var variation in variations) { var ret = ruleBasedIndex.Retrieve(variation).FirstOrDefault(); Assert.NotNull(ret); OutputHelper.WriteLine($"Variation={variation}. Ret={JsonConvert.SerializeObject(ret)}"); Assert.Equal(MatchingRuleType.Contain, ret.Type); } } } OutputHelper.WriteLine($"Elapsed={stopWatch.ElapsedMilliseconds}"); }