コード例 #1
0
        private List <string> WordSplitResult(string strWords)
        {
            List <string>        result = new List <string>();
            IEnumerable <string> segments;

            switch (comboBoxCutMode.SelectedIndex)
            {
            case 0:
                segments = segmenter.Cut(strWords);
                break;

            case 1:
                segments = segmenter.CutForSearch(strWords);
                break;

            case 2:
                var idf = new TfidfExtractor();
                segments = idf.ExtractTags(strWords, 20, Constants.NounAndVerbPos);
                break;

            default:
                var textRank = new TextRankExtractor();
                segments = textRank.ExtractTags(strWords, 20, Constants.NounAndVerbPos);
                break;
            }
            foreach (string str in string.Join(" ", segments).Split(' '))
            {
                if (!stopwordsList.Contains(str))
                {
                    result.Add(str);
                }
            }
            return(result);
        }
コード例 #2
0
        public void TestTextRankExtractorWithWeight()
        {
            var s         = "此外,公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元,增资后,吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目 2013年,实现营业收入0万元,实现净利润-139.13万元。";
            var extractor = new TextRankExtractor();
            var result    = extractor.ExtractTagsWithWeight(s);

            foreach (var tag in result)
            {
                Console.WriteLine("({0}, {1})", tag.Word, tag.Weight);
            }
        }
コード例 #3
0
        //计算权值 并生成js文件 用于生成词云
        static public void WordWeightCal()
        {
            string        wordcloud = @"D:\data\wordcloud\";
            DirectoryInfo theFolder = new DirectoryInfo(@"D:\data\news\");

            if (!Directory.Exists(wordcloud))
            {
                Directory.CreateDirectory(wordcloud);
                //Console.WriteLine("create");
            }
            DirectoryInfo[] dirInfo = theFolder.GetDirectories();
            var             threads = new List <Thread>();

            foreach (var item in dirInfo)
            {
                var thread = new Thread(() =>
                {
                    //Console.WriteLine(item.Name);
                    FileInfo[] fileInfo = item.GetFiles("*.txt", SearchOption.AllDirectories);
                    string text         = "";
                    foreach (var file in fileInfo)
                    {
                        using (StreamReader sr = new StreamReader(file.FullName))
                        {
                            text += sr.ReadToEnd();
                        }
                    }
                    //var extractor = new TfidfExtractor();
                    var extractor = new TextRankExtractor();
                    var a         = extractor.ExtractTagsWithWeight(text);
                    using (StreamWriter sw = new StreamWriter(wordcloud + item.Name + ".js", false))
                    {
                        sw.WriteLine("words = [");
                        foreach (var result in a)
                        {
                            sw.WriteLine("{ text: '" + result.Word + "', size: " + result.Weight + ", href: 'https://www.baidu.com/s?wd=" + result.Word + "' },");
                        }
                        sw.WriteLine("];");
                    }
                    lock (locker)
                    {
                        c++;
                    }
                    //Console.WriteLine(c);
                });
                threads.Add(thread);
                thread.Start();
            }
            foreach (var t in threads)
            {
                t.Join();
            }
            Console.WriteLine("分词完成!");
        }
コード例 #4
0
        public void TestTextRankExtractorWithoutWeights()
        {
            var s         = "此外,公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元,增资后,吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目 2013年,实现营业收入0万元,实现净利润-139.13万元。";
            var extractor = new TextRankExtractor();
            var result    = extractor.ExtractTags(s);

            Assert.That(result, Contains.Item("吉林"));

            result = extractor.ExtractTags(s, allowPos: new [] { "n" });
            Assert.That(result, Is.Not.Contains("吉林"));
            Assert.That(result, Is.Not.Contains("实现"));
        }
コード例 #5
0
 void init()
 {
     if (Extractor == null)
     {
         TextRankExtractor = new TextRankExtractor();
         TfidfExtractor    = new TfidfExtractor();
     }
     if (Algorithm == ExtractAlgorithm.TextRank)
     {
         Extractor = TextRankExtractor;
     }
     if (Algorithm == ExtractAlgorithm.TF_IDF)
     {
         Extractor = TfidfExtractor;
     }
 }
コード例 #6
0
        /// <summary>
        /// 获取num个核心句
        /// </summary>
        /// <param name="text">文本</param>
        /// <param name="num">核心句数</param>
        /// <param name="type">抽取类型</param>
        public void GetList(string text, int num, int type)
        {
            keywordList.Clear();

            //获取核心关键词列表
            switch (type)
            {
            case 1:
            {
                TfidfExtractor te = new TfidfExtractor();
                keywordList = te.ExtractTags(text, num).ToList();
            }
            break;

            case 2:
            {
                TextRankExtractor te = new TextRankExtractor();
                keywordList = te.ExtractTags(text, num).ToList();
            }
            break;
            }

            AllsentenceList.Clear();
            keySentenceList.Clear();


            //将文章拆为句子列表,并分词
            text = text.Replace(Environment.NewLine.ToString(), " 。");
            //text = text.Replace(" ", "");
            AllsentenceList = text.Split('。', '?').Where(x => !string.IsNullOrEmpty(x) && x != "undefined").Select(x => x.Trim()).ToList();
            List <Sentence> temp = new List <Sentence>();

            for (int i = 0; i < AllsentenceList.Count; i++)
            {
                AllsentenceList[i] = AllsentenceList[i] + "。";
                var      sentence = segmenter.Cut(AllsentenceList[i]);
                Sentence v        = new Sentence();
                v.Sen   = string.Join(" ", sentence);
                v.Index = i;
                temp.Add(v);
            }
            GetSentenceList(keywordList, temp);
        }