public int quantifyResultsForUrl(String url_to_test) { KernelResults url_results = searchAll(url_to_test); KernelResults hostname_results = searchHostNameOfUrl(url_to_test); return(quantifyResultsForUrl(url_to_test, hostname_results, url_results)); }
public KernelResults searchAll(String buf) { KernelResults target = new KernelResults(); bad_url_scanner.ScanBuffer(buf, target); good_url_scanner.ScanBuffer(buf, target); postbad_url_scanner.ScanBuffer(buf, target); bad_phrase_scanner.ScanBuffer(buf, target); return(target); }
public int quantifyResultsForUrlData(KernelResults text_results, KernelResults link_results) { int result = 0; bool is_good = false; bool is_bad = false; bool is_unknown = false; if (link_results.getTotalBadness() > 0) { is_bad = true; is_good = false; is_unknown = false; } if (link_results.getTotalPostbad() > 0 || link_results.getTotalPostbad() > 0) { is_bad = true; is_good = false; is_unknown = false; } if (text_results.getTotalBadness() > 0) { is_bad = true; is_good = false; is_unknown = false; } if (text_results.getTotalPostbad() > 0) { is_bad = true; is_good = false; is_unknown = false; } if (is_bad) { result = -1; } if (is_good) { result = 1; } return(result); }
public KernelResults searchHostNameOfUrl(String url_string) { KernelResults target = new KernelResults(); Uri url = null; String host = null; url = new Uri(url_string); host = url.Host; bad_url_scanner.ScanBuffer(host, target); //good_url_scanner.ScanBuffer(host, target); postbad_url_scanner.ScanBuffer(host, target); bad_phrase_scanner.ScanBuffer(host, target); return(target); }
public int quantifyResultsForUrl(String url, KernelResults hostname_results, KernelResults url_results) { int result = 0; bool is_good = false; bool is_bad = false; bool is_unknown = true; // if the hostname is known bad then we are bad if (hostname_results.getTotalBadness() > 0) { is_bad = true; is_good = false; is_unknown = false; } // if the hostname is known good then we are good if (hostname_results.getTotalGoodness() > 0) { is_good = true; is_bad = false; is_unknown = false; } // if the hostname or the full url is known postbad then we are bad if (hostname_results.getTotalPostbad() > 0 || url_results.getTotalPostbad() > 0) { is_bad = true; is_good = false; is_unknown = false; } // if the site is not yet known then check the rest of the url if (is_unknown) { // if the rest of the url is known bad then we are bad if (url_results.getTotalBadness() > 0) { is_bad = true; is_good = false; is_unknown = false; } // if the rest of the url is known postbad then we are bad if (url_results.getTotalPostbad() > 0) { is_bad = true; is_good = false; is_unknown = false; } if (url_results.getTotalGoodness() > 0) { foreach (TreeNode i in url_results.getMatches()) { TreeFlag f = i.getFlags(); if (f.getGoodness() > 0) { String extracted = i.extractWord(); if (url == extracted || url == "http://" + extracted || url == "http://www." + extracted) { is_bad = false; is_good = true; is_unknown = false; break; } } } } } if (is_bad) { result = -1; } if (is_good) { result = 1; } return(result); }