コード例 #1
0
        public static List <string> DoDetect(string input)
        {
            input = NGramKeyword.removeHTMLWithLineAndSpace(input);
            List <string> R = new List <string>();

            string[] _sp = stringSeparate(input);
            for (int k = 0; k < _sp.Length; k++)
            {
                string[] sp    = NormalizeStringToExtract(_sp[k]).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                string   cache = "";
                int      c     = 0;
                for (int i = 0; i < sp.Length; i++)
                {
                    string cacheTrim = cache.Trim(new char[] { ' ', '.', ',', '…' });
                    if (IsCapital(sp[i]))
                    {
                        cache += sp[i] + " ";
                        c++;
                        if (IsCode(sp[i]))
                        {
                            R.Add(sp[i].Trim(new char[] { ' ', '.', ',', '…' }));
                        }
                    }
                    else
                    {
                        if (c > 2)
                        {
                            R.Add(cacheTrim);
                        }
                        else if (ForeignDetect(cache))
                        {
                            R.Add(cacheTrim);
                        }
                        cache = "";
                        c     = 0;
                    }
                    if (i == sp.Length - 1)
                    {
                        if (c > 2)
                        {
                            R.Add(cacheTrim);
                        }
                        else if (ForeignDetect(cache))
                        {
                            R.Add(cacheTrim);
                        }
                        cache = "";
                        c     = 0;
                    }
                }
            }
            var rx = R.Distinct().ToList();

            rx.Remove("");
            rx.Remove("AloBacsi");

            return(rx);
        }
コード例 #2
0
        public static List <string> ExtractKeywordBasic(string input)
        {
            input = NGramKeyword.removeHTMLWithLineAndSpace(input);
            List <string> R = new List <string>();

            string[] _sp = stringSeparate(input);
            for (int k = 0; k < _sp.Length; k++)
            {
                string[] sp = NormalizeStringToExtract(_sp[k]).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                R.AddRange(sp);
            }
            var rx = R.Distinct().ToList();

            return(rx);
        }