Beispiel #1
0
        static void Main(string[] args)
        {
            var targetDataSet = GetTargetDataset();
//            BuildResultDataset(targetDataSet);
            var           resultDataSet = GetResultDataset();
            var           sw            = new Stopwatch();
            ITextAnalyser analyser      = new SimHashAnalyser();

            sw.Start();
            var testText = "您好呀,我是叶敏华";
//            foreach (var item in resultDataSet)
//            {
//                Console.WriteLine($"正在和{item.QID}比对......");
//                var similarityValue = analyser.GetSimilarityValue(testText, item.TextHashVector);
//                Console.WriteLine($"海明距离:{similarityValue}");
//            }
            var tagItem = targetDataSet.First().Content;

            Console.WriteLine($"目标内容是:{tagItem}");
            var result = analyser.GetSimilarityValue(testText, tagItem);

            Console.WriteLine($"海明距离:{result}");

            sw.Stop();
            Console.WriteLine($"用时:{sw.ElapsedMilliseconds} ms");
        }
Beispiel #2
0
        static void BuildResultDataset(IEnumerable <SpamWords> spamWordses)
        {
            using (var sw = new StreamWriter("result.txt"))
            {
                foreach (var r in spamWordses)
                {
                    var           result   = "";
                    var           text     = ReplaceHtmlTag(r.Content);
                    ITextAnalyser analyser = new SimHashAnalyser();
                    var           textHash = analyser.GetTextHashVector(text);
                    result = $"{textHash.ToString()},{r.QID}";
                    sw.WriteLine(result);
                }

                sw.Close();
            }

            Console.WriteLine("生成样例数据成功......");
        }
Beispiel #3
0
 private static float GetSimHash(string str1, string str2)
 {
     IAnalyser analyser = new SimHashAnalyser();
     return analyser.GetLikenessValue(str1, str2) * 100;
 }
Beispiel #4
0
        //字符串两两组合。
        //需要一个新的类型
        private static List<UrlCombination> GetCombinatorics(List<string> list)
        {
            List<UrlCombination> comList = new List<UrlCombination>();
            IAnalyser analyser = new SimHashAnalyser();
            foreach (var row in new Combination(list.Count, 2).GetRows())//row里存了,m中选出n,和结果数。
            {
                UrlCombination urlCom = new UrlCombination();
                List<string> com = Combination.Permute(row, list);//Combination.Permute(row, list)返回一个组合
                urlCom.Url1 = com[0];
                urlCom.Url2 = com[1];
                //SimHash运算
                urlCom.SimHash = analyser.GetLikenessValue(com[0], com[1]) * 100;
                comList.Add(urlCom);
            }

            return comList;
        }