Esempio n. 1
0
        private void btnGetPaths_Click(object sender, EventArgs e)
        {
            try
            {
                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(tbGetPathsFrom.Text);
                httpWebRequest.ReadWriteTimeout = 30000;
                httpWebRequest.Timeout          = 30000;
                httpWebRequest.UserAgent        = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24";

                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                if (httpWebResponse.StatusCode == HttpStatusCode.OK)
                {
                    using (StreamReader streamReader = new StreamReader(httpWebResponse.GetResponseStream()))
                    {
                        string toEnd = streamReader.ReadToEnd();

                        List <string> paths = new List <string>();

                        MatchCollection matchCollection = _hyperLinkRegex.Matches(toEnd);

                        if (matchCollection.Count != 0)
                        {
                            Uri baseUri = new Uri(tbGetPathsFrom.Text.TrimEnd('/').TrimEnd('#'), UriKind.Absolute);

                            foreach (Match match in matchCollection)
                            {
                                if (match.Groups["Tag"].Value.ToLowerInvariant() == "base")
                                {
                                    if (!Uri.TryCreate(match.Groups["HyperLink"].Value, UriKind.Absolute, out baseUri))
                                    {
                                        baseUri = new Uri(tbGetPathsFrom.Text.TrimEnd('/').TrimEnd('#'), UriKind.Absolute);

                                        break;
                                    }
                                }
                            }

                            UriBuilder uriBuilder = new UriBuilder(baseUri);
                            if (!baseUri.AbsoluteUri.EndsWith("/") && !baseUri.Segments[baseUri.Segments.Length - 1].Contains("."))
                            {
                                baseUri = new Uri(baseUri.AbsoluteUri + "/");
                            }

                            foreach (Match match in matchCollection)
                            {
                                Uri    hyperLinkDiscovery;
                                string groupValue = _discoveryManager.GetGroupValue(match, "HyperLink").TrimEnd('/').TrimEnd('#');
                                if (Uri.TryCreate(groupValue, UriKind.RelativeOrAbsolute, out hyperLinkDiscovery))
                                {
                                    if (!hyperLinkDiscovery.IsAbsoluteUri)
                                    {
                                        if (groupValue.StartsWith("?"))
                                        {
                                            uriBuilder.Query = groupValue.TrimStart('?');

                                            hyperLinkDiscovery = uriBuilder.Uri;
                                        }
                                        else
                                        {
                                            if (!string.IsNullOrEmpty(groupValue))
                                            {
                                                Uri.TryCreate(baseUri, hyperLinkDiscovery, out hyperLinkDiscovery);
                                            }
                                            else
                                            {
                                                hyperLinkDiscovery = new Uri(baseUri.AbsoluteUri.TrimEnd('/').TrimEnd('#'));
                                            }
                                        }
                                    }
                                }

                                if (!string.IsNullOrEmpty(hyperLinkDiscovery.AbsolutePath))
                                {
                                    string directoryName = Path.GetDirectoryName(hyperLinkDiscovery.AbsolutePath);

                                    if (!string.IsNullOrEmpty(directoryName))
                                    {
                                        directoryName = directoryName.Replace("\\", "/");

                                        if (!string.IsNullOrEmpty(directoryName) && directoryName != "/")
                                        {
                                            if (!paths.Contains(directoryName))
                                            {
                                                paths.Add(directoryName);
                                            }
                                        }
                                    }
                                }
                            }

                            dgvPathFilter.Rows.Clear();

                            foreach (string path in paths.OrderBy(p => p))
                            {
                                dgvPathFilter.Rows.Add(path, "Crawl", "Scrape");
                            }
                        }
                    }
                }
            }
            catch (Exception exception)
            {
                MessageBox.Show(exception.Message + Environment.NewLine + exception.StackTrace, _formText);
            }
        }