private byte[] ComputeHash(CanonicalURL url) { byte[] bytes = Encoding.ASCII.GetBytes(url.ToString()); SHA256 algo = new SHA256Managed(); return(algo.ComputeHash(bytes)); }
/// <summary> /// Generate valid url combinations that have to be checked againts the database /// </summary> /// <param name="url">Canonical URL</param> /// <returns>List of url combinations to be checked</returns> public IEnumerable <string> GenerateCombinations(CanonicalURL url) { var list = new List <string>(); var hostnames = new List <string>(); hostnames.Add(url.Host); //split hostname to individual components if (!IsIpAddress(url.Host)) { var res = SplitHost(url.Host).Skip(1); hostnames.AddRange(res.Take(res.Count() - 1).Skip(Math.Max(0, res.Count() - 5))); } var multipath = new List <string>(); //split path to individual components multipath.Add(url.Path); multipath.AddRange(SplitPath(url.Path).Skip(1).Reverse().Take(4)); string path = null; foreach (var hostname in hostnames) { path = multipath.First(); if (!String.IsNullOrEmpty(url.Query)) { list.Add(hostname + path + url.Query); } list.Add(hostname + path); foreach (var subpath in multipath.Skip(1)) { list.Add(hostname + subpath); } } return(list); }
/// <summary> /// Generate valid url combinations that have to be checked againts the database /// </summary> /// <param name="url">Canonical URL</param> /// <returns>List of url combinations to be checked</returns> public IEnumerable<string> GenerateCombinations(CanonicalURL url) { var list = new List<string>(); var hostnames = new List<string>(); hostnames.Add(url.Host); //split hostname to individual components if (!IsIpAddress(url.Host)) { var res = SplitHost(url.Host).Skip(1); hostnames.AddRange(res.Take(res.Count() - 1).Skip(Math.Max(0, res.Count() - 5))); } var multipath = new List<string>(); //split path to individual components multipath.Add(url.Path); multipath.AddRange(SplitPath(url.Path).Skip(1).Reverse().Take(4)); string path = null; foreach (var hostname in hostnames) { path = multipath.First(); if (!String.IsNullOrEmpty(url.Query)) list.Add(hostname + path + url.Query); list.Add(hostname + path); foreach (var subpath in multipath.Skip(1)) { list.Add(hostname + subpath); } } return list; }
/// <summary> /// A client may request the list of full-length hashes for a hash prefix. /// This usually occurs when a client is about to download content from a url whose /// calculated hash starts with a prefix listed in a blacklist. /// </summary> /// <param name="targetURL"></param> /// <returns>List of full length hashes</returns> public IEnumerable <byte[]> GetFullHashes(CanonicalURL targetURL) { var requestUrl = String.Format(ApiURL, "gethash", ApiKey, ClientVersion); var request = (HttpWebRequest)WebRequest.Create(requestUrl); if (request.Proxy != null) { request.Proxy.Credentials = CredentialCache.DefaultNetworkCredentials; } request.Method = "POST"; //request.Timeout = 2000; var hash = ComputeHash(targetURL); var s = request.GetRequestStream(); //we only send one hash and assume prefix size of 4 var content = Encoding.ASCII.GetBytes("4:4\n"); s.Write(content, 0, 4); s.Write(hash, 0, 4); s.Close(); var response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode != HttpStatusCode.OK) { return(null); } var hashes = ParseFullHashes(response.GetResponseStream()); return(hashes); }
/// <summary> /// Get canonical url according to http://code.google.com/intl/cs-CZ/apis/safebrowsing/developers_guide_v2.html#Canonicalization /// </summary> public static CanonicalURL Get(string url) { //remove escape characters Regex escChars = new Regex(@"\r|\t|\n|\v"); url = escChars.Replace(url, String.Empty); //remove leading and trailing whitespace url = url.Trim(' '); //remove fragment Regex frag = new Regex(@"#.*"); url = frag.Replace(url, String.Empty); //repeatedly unescape url = Unescape(url); //remove all leading and trailing dots Regex urlReg = new Regex(@"^((?:http|https|ftp)\://)?(.+?)(?:(/.*?)|)(\?.+)?$"); Match urlMatch = urlReg.Match(url); if (!urlMatch.Success) { throw new ArgumentException("Supplied URL was not in valid format " + url); } var schema = urlMatch.Groups[1].Value; if (String.IsNullOrEmpty(schema)) { schema = "http://"; } var host = urlMatch.Groups[2].Value; host = host.TrimStart('.').TrimEnd('.'); //replace consecutive dots with a single dot Regex dots = new Regex(@"\.\.+"); host = dots.Replace(host, String.Empty); //lower case host = host.ToLowerInvariant(); long intHost = -1; if (Int64.TryParse(host, out intHost)) { host = String.Format("{0}.{1}.{2}.{3}", (intHost >> 24) & 255, (intHost >> 16) & 255, (intHost >> 8) & 255, (intHost) & 255); } var path = urlMatch.Groups[3].Value; //replace path sequence Regex seq1 = new Regex(@"(?:/\./|//)"); path = seq1.Replace(path, @"/"); Regex seq2 = new Regex(@"/.+?/\.\./?"); path = seq2.Replace(path, String.Empty); if (String.IsNullOrEmpty(path)) { path = "/"; } var query = urlMatch.Groups[4].Value; var curl = new CanonicalURL() { Schema = Encode(schema), Host = Encode(host), Path = Encode(path), Query = Encode(query) }; return(curl); }
private byte[] ComputeHash(CanonicalURL url) { byte[] bytes = Encoding.ASCII.GetBytes(url.ToString()); SHA256 algo = new SHA256Managed(); return algo.ComputeHash(bytes); }
/// <summary> /// A client may request the list of full-length hashes for a hash prefix. /// This usually occurs when a client is about to download content from a url whose /// calculated hash starts with a prefix listed in a blacklist. /// </summary> /// <param name="targetURL"></param> /// <returns>List of full length hashes</returns> public IEnumerable<byte[]> GetFullHashes(CanonicalURL targetURL) { var requestUrl = String.Format(ApiURL, "gethash", ApiKey, ClientVersion); var request = (HttpWebRequest)WebRequest.Create(requestUrl); if (request.Proxy != null) request.Proxy.Credentials = CredentialCache.DefaultNetworkCredentials; request.Method = "POST"; //request.Timeout = 2000; var hash = ComputeHash(targetURL); var s = request.GetRequestStream(); //we only send one hash and assume prefix size of 4 var content = Encoding.ASCII.GetBytes("4:4\n"); s.Write(content, 0, 4); s.Write(hash, 0, 4); s.Close(); var response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode != HttpStatusCode.OK) return null; var hashes = ParseFullHashes(response.GetResponseStream()); return hashes; }
/// <summary> /// Get canonical url according to http://code.google.com/intl/cs-CZ/apis/safebrowsing/developers_guide_v2.html#Canonicalization /// </summary> public static CanonicalURL Get(string url) { //remove escape characters Regex escChars = new Regex(@"\r|\t|\n|\v"); url = escChars.Replace(url, String.Empty); //remove leading and trailing whitespace url = url.Trim(' '); //remove fragment Regex frag = new Regex(@"#.*"); url = frag.Replace(url, String.Empty); //repeatedly unescape url = Unescape(url); //remove all leading and trailing dots Regex urlReg = new Regex(@"^((?:http|https|ftp)\://)?(.+?)(?:(/.*?)|)(\?.+)?$"); Match urlMatch = urlReg.Match(url); if (!urlMatch.Success) throw new ArgumentException("Supplied URL was not in valid format " + url); var schema = urlMatch.Groups[1].Value; if (String.IsNullOrEmpty(schema)) schema = "http://"; var host = urlMatch.Groups[2].Value; host = host.TrimStart('.').TrimEnd('.'); //replace consecutive dots with a single dot Regex dots = new Regex(@"\.\.+"); host = dots.Replace(host, String.Empty); //lower case host = host.ToLowerInvariant(); long intHost = -1; if (Int64.TryParse(host, out intHost)) { host = String.Format("{0}.{1}.{2}.{3}", (intHost >> 24) & 255, (intHost >> 16) & 255, (intHost >> 8) & 255, (intHost) & 255); } var path = urlMatch.Groups[3].Value; //replace path sequence Regex seq1 = new Regex(@"(?:/\./|//)"); path = seq1.Replace(path, @"/"); Regex seq2 = new Regex(@"/.+?/\.\./?"); path = seq2.Replace(path, String.Empty); if (String.IsNullOrEmpty(path)) path = "/"; var query = urlMatch.Groups[4].Value; var curl = new CanonicalURL() { Schema = Encode(schema), Host = Encode(host), Path = Encode(path), Query = Encode(query) }; return curl; }