public void Extract(MyRootHtmlNode root) { CandidateWord.Clear(); //先导词列表 if (LeadingColonKeyWordList.Length > 0) { ExtractByColonKeyWord(root); } //结尾词列表 if (TrailingWordList.Length > 0) { ExtractByTrailingKeyWord(root); } //是否有符号包裹特征 if (MarkFeature.Length > 0) { ExtractByMarkFeature(root); } //开始字符结束字符 if (StartEndFeature.Length > 0) { ExtractByStartEndStringFeature(root); } //正则表达式检索 if (RegularExpressFeature.Length > 0) { ExtractByRegularExpressFeature(root); } }
//候选词 #region 常规文本 public void ExtractFromTextFile(string filename) { if (!File.Exists(filename)) { return; } CandidateWord.Clear(); if (LeadingColonKeyWordList.Length > 0) { ExtractTextByColonKeyWord(filename); } if (StartEndFeature.Length > 0) { ExtractByStartEndStringFeature(filename); } if (LeadingColonKeyWordListInChineseBrackets.Length > 0) { ExtractTextByInChineseBracketsColonKeyWord(filename); } }