static LogParser()
        {
            var parsers = new List <LogSectionParser>(LogSections.Count);

            foreach (var sectionDescription in LogSections)
            {
                var parser = new LogSectionParser
                {
                    OnLineCheckAsync = sectionDescription.OnNewLineAsync ?? ((l, s) => Task.CompletedTask),
                    OnSectionEnd     = sectionDescription.OnSectionEnd,
                    EndTrigger       = sectionDescription.EndTrigger.ToLatin8BitEncoding(),
                };
                // the idea here is to construct Aho-Corasick parser that will look for any data marker and run the associated regex to extract the data into state
                if (sectionDescription.Extractors?.Count > 0)
                {
                    var act = new AhoCorasickDoubleArrayTrie <Action <string, LogParseState> >(sectionDescription.Extractors.Select(extractorPair =>
                                                                                                                                    new SectionAction(
                                                                                                                                        extractorPair.Key.ToLatin8BitEncoding(),
                                                                                                                                        (buffer, state) => OnExtractorHit(buffer, extractorPair.Value, state)
                                                                                                                                        )
                                                                                                                                    ), true);
                    parser.OnExtract = (line, buffer, state) => { act.ParseText(line, h => { h.Value(buffer, state); }); };
                }
                parsers.Add(parser);
            }
            SectionParsers = parsers.AsReadOnly();
        }
        static bool saveDat(String path, AhoCorasickDoubleArrayTrie <String> trie, HashSet <KeyValuePair <String, String> > entrySet)
        {
            //try
            //{
            //    DataOutputStream out = new DataOutputStream(new FileOutputStream(path + Predefine.BIN_EXT));
            //out.writeInt(entrySet.size());
            //    for (Map.Entry<String, String> entry : entrySet)
            //    {
            //        char[] charArray = entry.getValue().toCharArray();
            //    out.writeInt(charArray.length);
            //        for (char c : charArray)
            //        {
            //        out.writeChar(c);
            //        }
            //    }
            //    trie.save(out);
            //out.close();
            //}
            //catch (Exception e)
            //{
            //    logger.warning("缓存值dat" + path + "失败");
            //    return false;
            //}

            return(true);
        }
 protected static String segLongest(char[] charArray, AhoCorasickDoubleArrayTrie <String> trie)
 {
     //            final String[] wordNet = new String[charArray.length];
     //            final int[] lengthNet = new int[charArray.length];
     //            trie.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<String>()
     //        {
     //            @Override
     //            public void hit(int begin, int end, String value)
     //        {
     //            int length = end - begin;
     //            if (length > lengthNet[begin])
     //            {
     //                wordNet[begin] = value;
     //                lengthNet[begin] = length;
     //            }
     //        }
     //    });
     //        StringBuilder sb = new StringBuilder(charArray.length);
     //        for (int offset = 0; offset<wordNet.length; )
     //        {
     //            if (wordNet[offset] == null)
     //            {
     //                sb.append(charArray[offset]);
     //                ++offset;
     //                continue;
     //            }
     //sb.append(wordNet[offset]);
     //            offset += lengthNet[offset];
     //        }
     //        return sb.toString();
     return(null);
 }
 static bool loadDat(String path, AhoCorasickDoubleArrayTrie <String> trie)
 {
     //ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
     //if (byteArray == null) return false;
     //int size = byteArray.nextInt();
     //String[] valueArray = new String[size];
     //for (int i = 0; i < valueArray.length; ++i)
     //{
     //    valueArray[i] = byteArray.nextString();
     //}
     //trie.load(byteArray, valueArray);
     return(true);
 }
        public void TestAhoCorasickDoubleArrayTrieForSingleLine(string line)
        {
            var acdat = new AhoCorasickDoubleArrayTrie <string>();
            var pairs = SkillSetMapper.SkillSet.Select((k, i) => new KeyValuePair <string, string>(k, i.ToString()));

            acdat.Build(pairs, true);
            var collectedValues = new List <string>();

            acdat.ParseText(line, hit => { collectedValues.Add(hit.Value); return(true); });
            Assert.IsNotEmpty(collectedValues);
            var collectedValuesresult = collectedValues.Where(i => SkillSetMapper.SkillSet.ElementAtOrDefault(int.Parse(i)) == null);

            Assert.IsEmpty(collectedValuesresult);
            var keyWord = SkillSetMapper.SkillSet[int.Parse(collectedValues.FirstOrDefault())].Trim();

            Assert.True(keyWord == "MATLAB");
        }
Beispiel #6
0
        public static void RebuildMatcher()
        {
            var newFilters = new Dictionary <FilterContext, AhoCorasickDoubleArrayTrie <Piracystring> >();

            using (var db = new BotDb())
                foreach (FilterContext ctx in Enum.GetValues(typeof(FilterContext)))
                {
                    var f = db.Piracystring.Where(ps => ps.Disabled == false && ps.Context.HasFlag(ctx)).AsNoTracking().ToList();
                    if (f.Count == 0)
                    {
                        newFilters[ctx] = null;
                    }
                    else
                    {
                        try
                        {
                            newFilters[ctx] = new AhoCorasickDoubleArrayTrie <Piracystring>(f.ToDictionary(s => s.String, s => s), true);
                        }
                        catch (ArgumentException)
                        {
                            var duplicate = (
                                from ps in f
                                group ps by ps.String
                                into g
                                where g.Count() > 1
                                select g.Key
                                ).ToList();
                            Config.Log.Error($"Duplicate triggers defined for Context {ctx}: {string.Join(", ", duplicate)}");
                            var triggerDictionary = new Dictionary <string, Piracystring>();
                            foreach (var ps in f)
                            {
                                triggerDictionary[ps.String] = ps;
                            }
                            newFilters[ctx] = new AhoCorasickDoubleArrayTrie <Piracystring>(triggerDictionary, true);
                        }
                    }
                }
            filters = newFilters;
        }
        // In order this test to run, you need to create a Resumes folder in test execution directory and put some test resumés.
        // Only technical skill matching is set up, to test with technical profiles.
        public void TestAhoCorasickDoubleArrayTrieForManyResumes()
        {
            var processor = new ResumeProcessor(new JsonOutputFormatter());
            var filePaths = Directory.GetFiles("Resumes").Select(Path.GetFullPath);
            var acdat     = new AhoCorasickDoubleArrayTrie <string>();
            var pairs     = SkillSetMapper.SkillSet.Select((k, i) => new KeyValuePair <string, string>(k, i.ToString()));

            acdat.Build(pairs, true);

            foreach (var filePath in filePaths)
            {
                var fileName = Path.GetFileName(filePath);
                var rawInput = processor._inputReaders.ReadIntoList(filePath);

                var collectedValues = new List <string>();
                foreach (var line in rawInput)
                {
                    acdat.ParseText(line, hit => { collectedValues.Add(hit.Value); return(true); });
                }
                Assert.IsNotEmpty(collectedValues, $"No match found in file: {filePath}");
            }
        }
Beispiel #8
0
        private static void AhoCorasickDoubleArrayTrieSearch(List <string> list, string txt)
        {
            var keywords = new Dictionary <string, string>();

            for (int i = 0; i < list.Count; i++)
            {
                keywords[list[i]] = list[i];
            }
            var matcher = new AhoCorasickDoubleArrayTrie <string>(keywords);
            var fs      = File.OpenWrite("AhoCorasickDoubleArrayTrie.dat");

            matcher.Save(fs, true);
            fs.Close();

            Stopwatch watch = new Stopwatch();

            watch.Start();
            for (int i = 0; i < 100000; i++)
            {
                matcher.ParseText(txt);
            }
            watch.Stop();
            Console.WriteLine(" AhoCorasickDoubleArrayTrie: " + watch.ElapsedMilliseconds.ToString("N0") + "ms");
        }
 /**
  * 读取词典
  * @param path
  * @param trie
  * @param reverse 是否将其翻转
  * @return
  */
 static bool load(String path, AhoCorasickDoubleArrayTrie <String> trie, bool reverse)
 {
     //String datPath = path;
     //if (reverse)
     //{
     //    datPath += Predefine.REVERSE_EXT;
     //}
     //if (loadDat(datPath, trie)) return true;
     //// 从文本中载入并且尝试生成dat
     //StringDictionary dictionary = new StringDictionary("=");
     //if (!dictionary.load(path)) return false;
     //if (reverse) dictionary = dictionary.reverse();
     //HashSet<KeyValuePair<String, String>> entrySet = dictionary;
     //dictionary<String, String> map = new Dictionary<String, String>();
     //for (Map.Entry<String, String> entry : entrySet)
     //{
     //    map.put(entry.getKey(), entry.getValue());
     //}
     //logger.info("正在构建AhoCorasickDoubleArrayTrie,来源:" + path);
     //trie.build(map);
     //logger.info("正在缓存双数组" + datPath);
     //saveDat(datPath, trie, entrySet);
     return(true);
 }
Beispiel #10
0
        private static bool EntitiesReader(string temp_txt_Location, string length, string dllPath, Dictionary <string, int> entityMissDic1, Dictionary <string, int> entityMissDic2, bool flag3)
        {
            StreamReader sr           = new StreamReader(temp_txt_Location);
            var          line         = sr.ReadToEnd();
            int          currentCount = 0;
            var          keywords     = new Dictionary <string, int>()
            {
                { ".class", 0 },
                { ".method", 0 },
                { "interface", 0 },
                { ".property", 0 },
                { ".assembly", 0 },
            };
            var matcher = new AhoCorasickDoubleArrayTrie <int>(keywords);
            var text    = line;

            matcher.ParseText(text, (hit) =>
            {
                switch (text.Substring(hit.Begin, hit.Length))
                {
                case ".class":
                    {
                        keywords.TryGetValue(".class", out currentCount);
                        keywords[".class"] = currentCount + 1;
                        break;
                    }

                case ".method":
                    {
                        keywords.TryGetValue(".method", out currentCount);
                        keywords[".method"] = currentCount + 1;
                        break;
                    }

                case "interface":
                    {
                        keywords.TryGetValue("interface", out currentCount);
                        keywords["interface"] = currentCount + 1;
                        break;
                    }

                case ".property":
                    {
                        keywords.TryGetValue(".property", out currentCount);
                        keywords[".property"] = currentCount + 1;
                        break;
                    }

                case ".assembly":
                    {
                        keywords.TryGetValue(".assembly", out currentCount);
                        keywords[".assembly"] = currentCount + 1;
                        break;
                    }

                default:
                    {
                        break;
                    }
                }
            });
            if (flag3)
            {
                entityMissDic1[".class"]    = keywords[".class"];
                entityMissDic1[".method"]   = keywords[".method"];
                entityMissDic1["interface"] = keywords["interface"];
                entityMissDic1[".property"] = keywords[".property"];
                entityMissDic1[".assembly"] = keywords[".assembly"];
                entityMissDic1["length"]    = Convert.ToInt32(length);
            }
            else
            {
                entityMissDic2[".class"]    = keywords[".class"];
                entityMissDic2[".method"]   = keywords[".method"];
                entityMissDic2["interface"] = keywords["interface"];
                entityMissDic2[".property"] = keywords[".property"];
                entityMissDic2[".assembly"] = keywords[".assembly"];
                entityMissDic2["length"]    = Convert.ToInt32(length);
            }
            //close the file
            sr.Close();
            return(true);
        }
Beispiel #11
0
 private static void RebuildMatcher()
 {
     matcher = PiracyStrings.Count == 0 ? null : new AhoCorasickDoubleArrayTrie <string>(PiracyStrings.ToDictionary(s => s, s => s), true);
 }
Beispiel #12
0
 public SkillSetMapper()
 {
     __skillSetMatcher = new AhoCorasickDoubleArrayTrie <string>();
     __skillSetMatcher.Build(SkillSet.Select((k, i) => new KeyValuePair <string, string>(k, i.ToString())), true);
 }
 /**
  * 将path的内容载入trie中
  * @param path
  * @param trie
  * @return
  */
 static bool load(String path, AhoCorasickDoubleArrayTrie <String> trie)
 {
     return(load(path, trie, false));
 }