Ejemplo n.º 1
0
        public int quantifyResultsForUrl(String url_to_test)
        {
            KernelResults url_results = searchAll(url_to_test);

            KernelResults hostname_results = searchHostNameOfUrl(url_to_test);

            return(quantifyResultsForUrl(url_to_test, hostname_results, url_results));
        }
Ejemplo n.º 2
0
        public KernelResults searchAll(String buf)
        {
            KernelResults target = new KernelResults();

            bad_url_scanner.ScanBuffer(buf, target);
            good_url_scanner.ScanBuffer(buf, target);
            postbad_url_scanner.ScanBuffer(buf, target);
            bad_phrase_scanner.ScanBuffer(buf, target);

            return(target);
        }
Ejemplo n.º 3
0
        public int quantifyResultsForUrlData(KernelResults text_results, KernelResults link_results)
        {
            int  result     = 0;
            bool is_good    = false;
            bool is_bad     = false;
            bool is_unknown = false;

            if (link_results.getTotalBadness() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }

            if (link_results.getTotalPostbad() > 0 || link_results.getTotalPostbad() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }

            if (text_results.getTotalBadness() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }

            if (text_results.getTotalPostbad() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }


            if (is_bad)
            {
                result = -1;
            }
            if (is_good)
            {
                result = 1;
            }

            return(result);
        }
Ejemplo n.º 4
0
        public KernelResults searchHostNameOfUrl(String url_string)
        {
            KernelResults target = new KernelResults();

            Uri    url  = null;
            String host = null;

            url = new Uri(url_string);

            host = url.Host;

            bad_url_scanner.ScanBuffer(host, target);
            //good_url_scanner.ScanBuffer(host, target);
            postbad_url_scanner.ScanBuffer(host, target);
            bad_phrase_scanner.ScanBuffer(host, target);

            return(target);
        }
Ejemplo n.º 5
0
        public int quantifyResultsForUrl(String url, KernelResults hostname_results,
                                         KernelResults url_results)
        {
            int  result     = 0;
            bool is_good    = false;
            bool is_bad     = false;
            bool is_unknown = true;

            // if the hostname is known bad then we are bad
            if (hostname_results.getTotalBadness() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }

            // if the hostname is known good then we are good
            if (hostname_results.getTotalGoodness() > 0)
            {
                is_good    = true;
                is_bad     = false;
                is_unknown = false;
            }

            // if the hostname or the full url is known postbad then we are bad
            if (hostname_results.getTotalPostbad() > 0 || url_results.getTotalPostbad() > 0)
            {
                is_bad     = true;
                is_good    = false;
                is_unknown = false;
            }

            // if the site is not yet known then check the rest of the url
            if (is_unknown)
            {
                // if the rest of the url is known bad then we are bad
                if (url_results.getTotalBadness() > 0)
                {
                    is_bad     = true;
                    is_good    = false;
                    is_unknown = false;
                }

                // if the rest of the url is known postbad then we are bad
                if (url_results.getTotalPostbad() > 0)
                {
                    is_bad     = true;
                    is_good    = false;
                    is_unknown = false;
                }

                if (url_results.getTotalGoodness() > 0)
                {
                    foreach (TreeNode i in url_results.getMatches())
                    {
                        TreeFlag f = i.getFlags();

                        if (f.getGoodness() > 0)

                        {
                            String extracted = i.extractWord();

                            if (url == extracted ||
                                url == "http://" + extracted ||
                                url == "http://www." + extracted)
                            {
                                is_bad     = false;
                                is_good    = true;
                                is_unknown = false;
                                break;
                            }
                        }
                    }
                }
            }

            if (is_bad)
            {
                result = -1;
            }
            if (is_good)
            {
                result = 1;
            }

            return(result);
        }