private void parseUA(string _userAgent) { int client_id = 0; int client_class_id = -1; int os_id = 0; int deviceclass_id = 0; System.Text.RegularExpressions.Regex searchTerm; PerlRegExpConverter regConv; if (!string.IsNullOrEmpty(_userAgent)) { userAgent.UaString = this.ua; userAgent.UaClass = "Unrecognized"; userAgent.UaClassCode = "unrecognized"; if (dt.Connected) { DataTable crawlerList = dt.selectQuery(@"SELECT udger_crawler_list.id as botid,name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code FROM udger_crawler_list LEFT JOIN udger_crawler_class ON udger_crawler_class.id = udger_crawler_list.class_id WHERE ua_string = '" + _userAgent + "'"); // crawler if (crawlerList.Rows.Count > 0) { client_class_id = 99; this.prepareCrawler(crawlerList.Rows[0]); } else { //client DataTable clientRegex = dt.selectQuery(@"SELECT class_id,client_id,regstring,name,name_code,homepage,icon,icon_big,engine,vendor,vendor_code,vendor_homepage,uptodate_current_version,client_classification,client_classification_code FROM udger_client_regex JOIN udger_client_list ON udger_client_list.id = udger_client_regex.client_id JOIN udger_client_class ON udger_client_class.id = udger_client_list.class_id ORDER BY sequence ASC"); if (clientRegex != null) { foreach (DataRow row in clientRegex.Rows) { regConv = new PerlRegExpConverter(row["regstring"].ToString(), "", Encoding.UTF8); searchTerm = regConv.Regex; if (searchTerm.IsMatch(_userAgent)) { var match = searchTerm.Match(_userAgent); if (match.Success && match.Groups.Count > 1) { this.prepareClientRegex(row, match, ref client_id, ref client_class_id); break; } else if (match.Success && match.Groups.Count == 1) { this.prepareClientRegex(row, match, ref client_id, ref client_class_id); break; } } } } } // OS DataTable osRegex = dt.selectQuery(@"SELECT os_id,regstring,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage FROM udger_os_regex JOIN udger_os_list ON udger_os_list.id = udger_os_regex.os_id ORDER BY sequence ASC"); if (osRegex != null) { foreach (DataRow row in osRegex.Rows) { regConv = new PerlRegExpConverter(row["regstring"].ToString(), "", Encoding.UTF8); searchTerm = regConv.Regex; if (searchTerm.IsMatch(_userAgent)) { this.prepareOs(row, ref os_id); break; } } } // client_os_relation if (os_id == 0 && client_id != 0) { DataTable clientOSRelations = dt.selectQuery(@"SELECT os_id,family,family_code,name,name_code,homepage,icon,icon_big,vendor,vendor_code,vendor_homepage FROM udger_client_os_relation JOIN udger_os_list ON udger_os_list.id = udger_client_os_relation.os_id WHERE client_id = '" + client_id.ToString() + "'"); if (clientOSRelations != null && clientOSRelations.Rows.Count > 0) { this.prepareOs(clientOSRelations.Rows[0], ref os_id); } } // device DataTable device = dt.selectQuery(@"SELECT deviceclass_id,regstring,name,name_code,icon,icon_big FROM udger_deviceclass_regex JOIN udger_deviceclass_list ON udger_deviceclass_list.id=udger_deviceclass_regex.deviceclass_id ORDER BY sequence ASC"); if (device != null) { foreach (DataRow row in device.Rows) { regConv = new PerlRegExpConverter(row["regstring"].ToString(), "", Encoding.UTF8); searchTerm = regConv.Regex; if (searchTerm.IsMatch(_userAgent)) { this.prepareDevice(row, ref deviceclass_id); break; } } } if (deviceclass_id == 0 && client_class_id != -1) { DataTable deviceList = dt.selectQuery(@"SELECT deviceclass_id,name,name_code,icon,icon_big FROM udger_deviceclass_list JOIN udger_client_class ON udger_client_class.deviceclass_id = udger_deviceclass_list.id WHERE udger_client_class.id = '" + client_class_id.ToString() + "'"); if (deviceList != null && deviceList.Rows.Count > 0) { this.prepareDevice(deviceList.Rows[0], ref deviceclass_id); } } } } }
/// <summary> /// Check if useragent string and/or IP address is bot /// </summary> /// <param name="_useragent">user agent string</param> /// <param name="_ip">IP address v4 or v6</param> /// <returns>Dictionary</returns> public Dictionary <string, object> isBot(string _useragent = "", string _ip = "") { this.WriteDebug("isBot: start"); Dictionary <string, object> ret = new Dictionary <string, object>();; if (_useragent == "" && _ip == "") { this.WriteDebug("isBot: Missing mandatory parameter"); ret.Add("flag", 1); ret.Add("errortext", "missing mandatory parameter"); return(ret); } if (_ip != "" && !this.IsValidIp(_ip)) { this.WriteDebug("isBot: IP address is not valid"); ret.Add("flag", 2); ret.Add("errortext", "ip address is not valid"); return(ret); } dt.connect(this); if (!dt.Connected) { this.WriteDebug("Data file not found, download the data manually"); ret.Add("flag", 3); ret.Add("errortext", "data file not found"); return(ret); } bool botInfo = false; bool botInfoUA = false; bool botInfoIP = false; bool harmony = false; string botName = ""; string family = ""; string botURL = ""; if (_useragent != "") { this.WriteDebug("isBot: test useragent"); DataTable table1 = dt.selectQuery("SELECT name,family FROM c_robots where md5='" + this.CreateMD5(_useragent) + "'"); if (table1.Rows.Count > 0) { DataRow rowusr = table1.Rows[0]; botInfo = true; botInfoUA = true; botName = rowusr["name"].ToString(); family = rowusr["family"].ToString(); botURL = "http://udger.com/resources/ua-list/bot-detail?bot=" + family; } } if (_ip != "") { this.WriteDebug("isBot: test IP address"); DataTable table2 = dt.selectQuery("SELECT name,family from c_robots AS C JOIN bot_ip as B ON C.id=B.robot and B.md5='" + this.CreateMD5(_ip) + "' "); if (table2.Rows.Count > 0) { DataRow row = table2.Rows[0]; botInfo = true; botInfoIP = true; if (family == row["family"].ToString()) { harmony = true; } botName = row["name"].ToString(); botURL = "http://udger.com/resources/ua-list/bot-detail?bot=" + row["family"].ToString(); } } this.WriteDebug("isBot: completed"); ret.Add("flag", 0); ret.Add("is_bot", botInfo); ret.Add("bot_by_ua", botInfoUA); ret.Add("bot_by_ip", botInfoIP); ret.Add("harmony_ua_ip", harmony); ret.Add("bot_name", botName); ret.Add("bot_udger_url", botURL); return(ret); }