示例#1
0
        public void ExtractTagsDemo2()
        {
            var text = @"在数学和计算机科学/算学之中,算法/算则法(Algorithm)为一个计算的具体步骤,常用于计算、数据处理和自动推理。精确而言,算法是一个表示为有限长列表的有效方法。算法应包含清晰定义的指令用于计算函数。
                         算法中的指令描述的是一个计算,当其运行时能从一个初始状态和初始输入(可能为空)开始,经过一系列有限而清晰定义的状态最终产生输出并停止于一个终态。一个状态到另一个状态的转移不一定是确定的。随机化算法在内的一些算法,包含了一些随机输入。
                         形式化算法的概念部分源自尝试解决希尔伯特提出的判定问题,并在其后尝试定义有效计算性或者有效方法中成形。这些尝试包括库尔特·哥德尔、雅克·埃尔布朗和斯蒂芬·科尔·克莱尼分别于1930年、1934年和1935年提出的递归函数,阿隆佐·邱奇于1936年提出的λ演算,1936年Emil Leon Post的Formulation 1和艾伦·图灵1937年提出的图灵机。即使在当前,依然常有直觉想法难以定义为形式化算法的情况。";

            var extractor = new TfidfExtractor();
            var keywords  = extractor.ExtractTags(text, 10, Constants.NounAndVerbPos);

            foreach (var keyword in keywords)
            {
                Console.WriteLine(keyword);
            }
        }
示例#2
0
        public void TestSetStopWords()
        {
            var tfidf = new TfidfExtractor();

            // Use less stopwords than default stopword list.
            tfidf.SetStopWords(TestHelper.GetResourceFilePath("stop_words_test.txt"));
            var text   = GetFileContents(TestHelper.GetResourceFilePath("article.txt"));
            var result = tfidf.ExtractTags(text, 30);

            foreach (var tag in result)
            {
                Console.WriteLine(tag);
            }
        }
示例#3
0
        /// <summary>
        /// 获取num个核心句
        /// </summary>
        /// <param name="text">文本</param>
        /// <param name="num">核心句数</param>
        /// <param name="type">抽取类型</param>
        public void GetList(string text, int num, int type)
        {
            keywordList.Clear();

            //获取核心关键词列表
            switch (type)
            {
            case 1:
            {
                TfidfExtractor te = new TfidfExtractor();
                keywordList = te.ExtractTags(text, num).ToList();
            }
            break;

            case 2:
            {
                TextRankExtractor te = new TextRankExtractor();
                keywordList = te.ExtractTags(text, num).ToList();
            }
            break;
            }

            AllsentenceList.Clear();
            keySentenceList.Clear();


            //将文章拆为句子列表,并分词
            text = text.Replace(Environment.NewLine.ToString(), " 。");
            //text = text.Replace(" ", "");
            AllsentenceList = text.Split('。', '?').Where(x => !string.IsNullOrEmpty(x) && x != "undefined").Select(x => x.Trim()).ToList();
            List <Sentence> temp = new List <Sentence>();

            for (int i = 0; i < AllsentenceList.Count; i++)
            {
                AllsentenceList[i] = AllsentenceList[i] + "。";
                var      sentence = segmenter.Cut(AllsentenceList[i]);
                Sentence v        = new Sentence();
                v.Sen   = string.Join(" ", sentence);
                v.Index = i;
                temp.Add(v);
            }
            GetSentenceList(keywordList, temp);
        }
示例#4
0
        public static decimal GetHousePrice(string text)
        {
            //var seg = new JiebaSegmenter();
            //var li = seg.Cut(text).ToList();

            decimal housePrice = 0;
            var     extractor  = new TfidfExtractor();
            var     keywords   = extractor.ExtractTags(text, 20, new List <string>()
            {
                "m"
            });

            if (keywords != null)
            {
                var lstProce = keywords.Distinct().Where(s => s.Length <= 5 && s.Length >= 3).OrderByDescending(s => s.Length);
                var price    = lstProce.FirstOrDefault();
                decimal.TryParse(price, out housePrice);
            }
            return(housePrice);
        }
示例#5
0
        public static int GetHousePrice(string text)
        {
            int housePrice = 0;
            var extractor  = new TfidfExtractor();
            var keywords   = extractor.ExtractTags(text, 20, new List <string>()
            {
                "m"
            });

            if (keywords != null)
            {
                var prices = keywords.Distinct().Select(p =>
                {
                    var price = 0;
                    int.TryParse(p, out price);
                    return(price);
                }).Where(p => p >= 500 && p <= 30000);
                return(prices.FirstOrDefault());
            }
            return(housePrice);
        }
示例#6
0
        private static void oprateJieBa(string filename)
        {
            string[] filenames = filename.Split('\\');

            string filename1 = "E:\\词云\\JieBaResult\\" + filenames[2] + ".csv";   //用来存储jieba分析后的结果
            string text      = File.ReadAllText(filename);
            var    segmenter = new JiebaSegmenter();
            var    segments  = segmenter.Cut(text);
            var    extractor = new TfidfExtractor();
            var    keywords  = extractor.ExtractTags(text, 30, Constants.NounAndVerbPos);

            Console.WriteLine(filename);
            string str = null;

            foreach (var keyword in keywords)
            {
                str = str + keyword + "\n";
                Console.WriteLine(keyword);
            }
            StreamWriter fz = new StreamWriter(filename1, true);

            fz.Write(str);
            fz.Close();
        }
示例#7
0
        /// <summary>
        /// 提取文章关键词集合
        /// </summary>
        /// <param name="objStr"></param>
        /// <returns></returns>
        public static IEnumerable <string> GetArticleKeywords(string objStr)
        {
            var idf = new TfidfExtractor();

            return(idf.ExtractTags(objStr, 10, Constants.NounAndVerbPos));//名词和动词
        }