public static List <string> DoDetect(string input) { input = NGramKeyword.removeHTMLWithLineAndSpace(input); List <string> R = new List <string>(); string[] _sp = stringSeparate(input); for (int k = 0; k < _sp.Length; k++) { string[] sp = NormalizeStringToExtract(_sp[k]).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); string cache = ""; int c = 0; for (int i = 0; i < sp.Length; i++) { string cacheTrim = cache.Trim(new char[] { ' ', '.', ',', '…' }); if (IsCapital(sp[i])) { cache += sp[i] + " "; c++; if (IsCode(sp[i])) { R.Add(sp[i].Trim(new char[] { ' ', '.', ',', '…' })); } } else { if (c > 2) { R.Add(cacheTrim); } else if (ForeignDetect(cache)) { R.Add(cacheTrim); } cache = ""; c = 0; } if (i == sp.Length - 1) { if (c > 2) { R.Add(cacheTrim); } else if (ForeignDetect(cache)) { R.Add(cacheTrim); } cache = ""; c = 0; } } } var rx = R.Distinct().ToList(); rx.Remove(""); rx.Remove("AloBacsi"); return(rx); }
public static List <string> ExtractKeywordBasic(string input) { input = NGramKeyword.removeHTMLWithLineAndSpace(input); List <string> R = new List <string>(); string[] _sp = stringSeparate(input); for (int k = 0; k < _sp.Length; k++) { string[] sp = NormalizeStringToExtract(_sp[k]).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); R.AddRange(sp); } var rx = R.Distinct().ToList(); return(rx); }