Canonical url according to http://code.google.com/intl/cs-CZ/apis/safebrowsing/developers_guide_v2.html#Canonicalization
Esempio n. 1
0
        private byte[] ComputeHash(CanonicalURL url)
        {
            byte[] bytes = Encoding.ASCII.GetBytes(url.ToString());

            SHA256 algo = new SHA256Managed();

            return(algo.ComputeHash(bytes));
        }
Esempio n. 2
0
        /// <summary>
        /// Generate valid url combinations that have to be checked againts the database
        /// </summary>
        /// <param name="url">Canonical URL</param>
        /// <returns>List of url combinations to be checked</returns>
        public IEnumerable <string> GenerateCombinations(CanonicalURL url)
        {
            var list = new List <string>();


            var hostnames = new List <string>();

            hostnames.Add(url.Host);

            //split hostname to individual components
            if (!IsIpAddress(url.Host))
            {
                var res = SplitHost(url.Host).Skip(1);
                hostnames.AddRange(res.Take(res.Count() - 1).Skip(Math.Max(0, res.Count() - 5)));
            }


            var multipath = new List <string>();

            //split path to individual components
            multipath.Add(url.Path);

            multipath.AddRange(SplitPath(url.Path).Skip(1).Reverse().Take(4));

            string path = null;

            foreach (var hostname in hostnames)
            {
                path = multipath.First();

                if (!String.IsNullOrEmpty(url.Query))
                {
                    list.Add(hostname + path + url.Query);
                }

                list.Add(hostname + path);

                foreach (var subpath in multipath.Skip(1))
                {
                    list.Add(hostname + subpath);
                }
            }

            return(list);
        }
        /// <summary>
        /// Generate valid url combinations that have to be checked againts the database
        /// </summary>
        /// <param name="url">Canonical URL</param>
        /// <returns>List of url combinations to be checked</returns>
        public IEnumerable<string> GenerateCombinations(CanonicalURL url)
        {
            var list = new List<string>();


            var hostnames = new List<string>();

            hostnames.Add(url.Host);

            //split hostname to individual components
            if (!IsIpAddress(url.Host))
            {
                var res = SplitHost(url.Host).Skip(1);
                hostnames.AddRange(res.Take(res.Count() - 1).Skip(Math.Max(0, res.Count() - 5)));
            }


            var multipath = new List<string>();

            //split path to individual components
            multipath.Add(url.Path);

            multipath.AddRange(SplitPath(url.Path).Skip(1).Reverse().Take(4));

            string path = null;
            foreach (var hostname in hostnames)
            {
                path = multipath.First();

                if (!String.IsNullOrEmpty(url.Query))
                    list.Add(hostname + path + url.Query);

                list.Add(hostname + path);

                foreach (var subpath in multipath.Skip(1))
                {
                    list.Add(hostname + subpath);
                }

            }

            return list;
        }
Esempio n. 4
0
        /// <summary>
        /// A client may request the list of full-length hashes for a hash prefix.
        /// This usually occurs when a client is about to download content from a url whose
        /// calculated hash starts with a prefix listed in a blacklist.
        /// </summary>
        /// <param name="targetURL"></param>
        /// <returns>List of full length hashes</returns>
        public IEnumerable <byte[]> GetFullHashes(CanonicalURL targetURL)
        {
            var requestUrl = String.Format(ApiURL, "gethash", ApiKey, ClientVersion);

            var request = (HttpWebRequest)WebRequest.Create(requestUrl);

            if (request.Proxy != null)
            {
                request.Proxy.Credentials = CredentialCache.DefaultNetworkCredentials;
            }

            request.Method = "POST";
            //request.Timeout = 2000;

            var hash = ComputeHash(targetURL);

            var s = request.GetRequestStream();

            //we only send one hash and assume prefix size of 4
            var content = Encoding.ASCII.GetBytes("4:4\n");

            s.Write(content, 0, 4);
            s.Write(hash, 0, 4);

            s.Close();

            var response = (HttpWebResponse)request.GetResponse();

            if (response.StatusCode != HttpStatusCode.OK)
            {
                return(null);
            }

            var hashes = ParseFullHashes(response.GetResponseStream());

            return(hashes);
        }
Esempio n. 5
0
        /// <summary>
        /// Get canonical url according to http://code.google.com/intl/cs-CZ/apis/safebrowsing/developers_guide_v2.html#Canonicalization
        /// </summary>
        public static CanonicalURL Get(string url)
        {
            //remove escape characters
            Regex escChars = new Regex(@"\r|\t|\n|\v");

            url = escChars.Replace(url, String.Empty);

            //remove leading and trailing whitespace
            url = url.Trim(' ');

            //remove fragment
            Regex frag = new Regex(@"#.*");

            url = frag.Replace(url, String.Empty);

            //repeatedly unescape
            url = Unescape(url);

            //remove all leading and trailing dots
            Regex urlReg   = new Regex(@"^((?:http|https|ftp)\://)?(.+?)(?:(/.*?)|)(\?.+)?$");
            Match urlMatch = urlReg.Match(url);

            if (!urlMatch.Success)
            {
                throw new ArgumentException("Supplied URL was not in valid format " + url);
            }

            var schema = urlMatch.Groups[1].Value;

            if (String.IsNullOrEmpty(schema))
            {
                schema = "http://";
            }

            var host = urlMatch.Groups[2].Value;

            host = host.TrimStart('.').TrimEnd('.');

            //replace consecutive dots with a single dot
            Regex dots = new Regex(@"\.\.+");

            host = dots.Replace(host, String.Empty);

            //lower case
            host = host.ToLowerInvariant();

            long intHost = -1;

            if (Int64.TryParse(host, out intHost))
            {
                host = String.Format("{0}.{1}.{2}.{3}", (intHost >> 24) & 255,
                                     (intHost >> 16) & 255,
                                     (intHost >> 8) & 255,
                                     (intHost) & 255);
            }

            var path = urlMatch.Groups[3].Value;

            //replace path sequence
            Regex seq1 = new Regex(@"(?:/\./|//)");

            path = seq1.Replace(path, @"/");

            Regex seq2 = new Regex(@"/.+?/\.\./?");

            path = seq2.Replace(path, String.Empty);

            if (String.IsNullOrEmpty(path))
            {
                path = "/";
            }

            var query = urlMatch.Groups[4].Value;

            var curl = new CanonicalURL()
            {
                Schema = Encode(schema),
                Host   = Encode(host),
                Path   = Encode(path),
                Query  = Encode(query)
            };

            return(curl);
        }
        private byte[] ComputeHash(CanonicalURL url)
        {
            byte[] bytes = Encoding.ASCII.GetBytes(url.ToString());

            SHA256 algo = new SHA256Managed();

            return algo.ComputeHash(bytes);
        }
        /// <summary>
        /// A client may request the list of full-length hashes for a hash prefix. 
        /// This usually occurs when a client is about to download content from a url whose 
        /// calculated hash starts with a prefix listed in a blacklist.
        /// </summary>
        /// <param name="targetURL"></param>
        /// <returns>List of full length hashes</returns>
        public IEnumerable<byte[]> GetFullHashes(CanonicalURL targetURL)
        {
            var requestUrl = String.Format(ApiURL, "gethash", ApiKey, ClientVersion);

            var request = (HttpWebRequest)WebRequest.Create(requestUrl);

            if (request.Proxy != null)
                request.Proxy.Credentials = CredentialCache.DefaultNetworkCredentials;

            request.Method = "POST";
            //request.Timeout = 2000;

            var hash = ComputeHash(targetURL);

            var s = request.GetRequestStream();

            //we only send one hash and assume prefix size of 4
            var content = Encoding.ASCII.GetBytes("4:4\n");
            s.Write(content, 0, 4);
            s.Write(hash, 0, 4);

            s.Close();

            var response = (HttpWebResponse)request.GetResponse();

            if (response.StatusCode != HttpStatusCode.OK)
                return null;

            var hashes = ParseFullHashes(response.GetResponseStream());

            return hashes;
        }
        /// <summary>
        /// Get canonical url according to http://code.google.com/intl/cs-CZ/apis/safebrowsing/developers_guide_v2.html#Canonicalization
        /// </summary>
        public static CanonicalURL Get(string url)
        {
            //remove escape characters
            Regex escChars = new Regex(@"\r|\t|\n|\v");
            url = escChars.Replace(url, String.Empty);

            //remove leading and trailing whitespace
            url = url.Trim(' ');

            //remove fragment
            Regex frag = new Regex(@"#.*");
            url = frag.Replace(url, String.Empty);

            //repeatedly unescape
            url = Unescape(url);

            //remove all leading and trailing dots
            Regex urlReg = new Regex(@"^((?:http|https|ftp)\://)?(.+?)(?:(/.*?)|)(\?.+)?$");
            Match urlMatch = urlReg.Match(url);

            if (!urlMatch.Success)
                throw new ArgumentException("Supplied URL was not in valid format " + url);

            var schema = urlMatch.Groups[1].Value;
            if (String.IsNullOrEmpty(schema))
                schema = "http://";

            var host = urlMatch.Groups[2].Value;
            host = host.TrimStart('.').TrimEnd('.');

            //replace consecutive dots with a single dot
            Regex dots = new Regex(@"\.\.+");
            host = dots.Replace(host, String.Empty);

            //lower case
            host = host.ToLowerInvariant();

            long intHost = -1;
            if (Int64.TryParse(host, out intHost))
            {
                host = String.Format("{0}.{1}.{2}.{3}", (intHost >> 24) & 255,
                                                        (intHost >> 16) & 255,
                                                        (intHost >> 8) & 255,
                                                        (intHost) & 255);
            }

            var path = urlMatch.Groups[3].Value;

            //replace path sequence
            Regex seq1 = new Regex(@"(?:/\./|//)");
            path = seq1.Replace(path, @"/");

            Regex seq2 = new Regex(@"/.+?/\.\./?");
            path = seq2.Replace(path, String.Empty);

            if (String.IsNullOrEmpty(path))
                path = "/";

            var query = urlMatch.Groups[4].Value;

            var curl = new CanonicalURL()
            {
                Schema = Encode(schema),
                Host = Encode(host),
                Path = Encode(path),
                Query = Encode(query)
            };

            return curl;
        }