Exemplo n.º 1
0
        private void btnOpen_Click(object sender, EventArgs e)
        {
            if (openFileDialog.ShowDialog() == DialogResult.OK)
            {
                saveFileDialog.InitialDirectory = Path.GetDirectoryName(openFileDialog.FileName);

                UrlSet = new Hashtable();
                NewUrlSet = new Hashtable();
                WhiteDomains = new StringDictionary();
                BlackDomains = new StringDictionary();

                //Read in the CSV file and create a hashtable keyed off the URLs
                FileInfo fi = new FileInfo(openFileDialog.FileName);
                StreamReader sr = fi.OpenText();
                while (!sr.EndOfStream)
                {
                    String line = sr.ReadLine().Trim();
                    String[] parts = line.Split(',');
                    UrlMD md;
                    if (parts.Length == 1)
                    {
                        md = new UrlMD();//allows for a simple URL list on 1st invokation
                    }
                    else
                    {
                        md = new UrlMD(parts[1].Trim(), Convert.ToInt32(parts[2].Trim()), parts[3].Trim());
                    }
                    ProtectedAdd2UrlSet(CleanTrailingSlash(parts[0].Trim()), md);
                }
                sr.Close();

                //Repeat for the "New" list
                FileInfo fiNew = new FileInfo(fi.FullName.Replace(".csv", "_New.csv"));
                if (fiNew.Exists)
                {
                    sr = fiNew.OpenText();
                    while (!sr.EndOfStream)
                    {
                        String line = sr.ReadLine().Trim();
                        String[] parts = line.Split(',');
                        UrlMD md;
                        if (parts.Length == 1)
                        {
                            md = new UrlMD();//allows for a simple URL list on 1st invokation
                        }
                        else
                        {
                            md = new UrlMD(parts[1].Trim(), Convert.ToInt32(parts[2].Trim()));
                        }
                        ProtectedAdd2NewUrlSet(CleanTrailingSlash(parts[0].Trim()), md);
                    }
                    sr.Close();
                }

                //read from whitelist and blacklist files (one domain per line) from same directory
                fi = new FileInfo(Path.Combine(Path.GetDirectoryName(openFileDialog.FileName),"WhiteDomains"));
                if (fi.Exists)
                {
                    sr = fi.OpenText();
                    while (!sr.EndOfStream)
                    {
                        String white = sr.ReadLine().Trim();
                        if (!WhiteDomains.ContainsKey(white) && white.Length > 0)
                        {
                            WhiteDomains.Add(white, white);
                        }
                    }
                }
                sr.Close();
                fi = new FileInfo(Path.Combine(Path.GetDirectoryName(openFileDialog.FileName), "BlackDomains"));
                if (fi.Exists)
                {
                    sr = fi.OpenText();
                    while (!sr.EndOfStream)
                    {
                        String black = sr.ReadLine().Trim();
                        if (!BlackDomains.ContainsKey(black) && black.Length>0)
                        {
                            BlackDomains.Add(black, black);
                        }
                    }
                }
                sr.Close();

                //get the enumerator and get the 1st URL
                currentIndex = 0;
                UrlSetBare = new ArrayList(UrlSet.Keys);
                lblUrlCount.Text = UrlSetBare.Count.ToString();
                UrlSetEnumerator = UrlSetBare.GetEnumerator();
                UrlSetIterate();

                btnSave.Enabled = true;
            }
        }
Exemplo n.º 2
0
        public void UrlSetIterate()
        {
            currentIndex++;
            bool url2get = UrlSetEnumerator.MoveNext();
            //this bit skips over entries that have already been given a type
            if(url2get){
                currentEntryUrl = UrlSetEnumerator.Current.ToString();
                currentEntryMD = (UrlMD)UrlSet[currentEntryUrl];
                while ((currentEntryMD.Type != "") && url2get)
                {
                    currentIndex++;
                    url2get = UrlSetEnumerator.MoveNext();
                    if (url2get)
                    {
                        currentEntryUrl = UrlSetEnumerator.Current.ToString();
                        currentEntryMD = (UrlMD)UrlSet[currentEntryUrl];
                    }
                }
            }
            //now load up the browser OR report all done
            if (url2get)
            {
                btnOpen.Enabled = false;
                grpDecisions.Enabled = false;
                grpExtract.Enabled = false;
                btnAbort.Enabled = true;
                abort = false;
                try
                {
                    webBrowser.Navigate(currentEntryUrl);
                    txtAddress.Text = currentEntryUrl;
                    //wait for the browser object to load the page
                    while ((webBrowser.ReadyState != WebBrowserReadyState.Complete) && !abort)
                    {
                        Application.DoEvents();
                    }
                    grpExtract.Enabled = true;
                }
                catch (Exception ex)
                {

                    webBrowser.Stop();
                    MessageBox.Show("Problem loading:" +
                        "\r\n1. select \"defer\" or \"Reject\";" +
                        "\r\n2. save;" +
                        "\r\n3. close the program.");
                }
                btnAbort.Enabled = false;
                grpDecisions.Enabled = true;
                //show progress
                progressBar.Value = 100 * currentIndex / UrlSet.Count;
            }
            else
            {
                MessageBox.Show("No more URLs in loaded set.\r\nReload the CSV file if new URLs have been added.");
                grpDecisions.Enabled = false;
                grpExtract.Enabled = false;
                progressBar.Value = 100;
            }
        }
Exemplo n.º 3
0
 /// <summary>
 /// Adds a URL to the hashtable with some checking
 /// </summary>
 public void ProtectedAdd2UrlSet(String URL, UrlMD MD)
 {
     URL = CleanTrailingSlash(URL);
     if (!UrlSet.ContainsKey(URL))
     {
         UrlSet.Add(URL, MD);
     }
 }
Exemplo n.º 4
0
 /// <summary>
 /// 
 /// </summary>
 /// <param name="Type"></param>
 public void UpdateCurrentEntryType(String Type, String FeedUrl)
 {
     currentEntryMD.Type = Type;
     currentEntryMD.FeedUrl = FeedUrl;
     //check if the user navigated away.
     //if they ended up at a different URL then "reject" the old one and add the new with the chosen type
     //if they ended up at a different URL, add the new URL as if it was auto-extracted and mark the original URL as type=""
     String navUrl = CleanTrailingSlash(webBrowser.Url.ToString());
     if (navUrl == currentEntryUrl)
     {
         UrlSet[currentEntryUrl] = currentEntryMD;
     }
     else
     {
         UrlSet[currentEntryUrl] = new UrlMD("reject");
         //UrlSet.Remove(currentEntryUrl);
         currentEntryUrl = navUrl;
         ProtectedAdd2UrlSet(currentEntryUrl, currentEntryMD);
     }
     grpFile.Enabled = true;
     btnSave.Enabled = true;
 }
Exemplo n.º 5
0
 /// <summary>
 /// Adds a URL to the hashtable of NEW URLs with some checking
 /// </summary>
 public void ProtectedAdd2NewUrlSet(String URL, UrlMD MD)
 {
     URL = CleanTrailingSlash(URL);
     if (URL.Length > 0)
     {
         if (!NewUrlSet.ContainsKey(URL) && !UrlSet.ContainsKey(URL))
         {
             NewUrlSet.Add(URL, MD);
         }
     }
 }