private void btnOpen_Click(object sender, EventArgs e) { if (openFileDialog.ShowDialog() == DialogResult.OK) { saveFileDialog.InitialDirectory = Path.GetDirectoryName(openFileDialog.FileName); UrlSet = new Hashtable(); NewUrlSet = new Hashtable(); WhiteDomains = new StringDictionary(); BlackDomains = new StringDictionary(); //Read in the CSV file and create a hashtable keyed off the URLs FileInfo fi = new FileInfo(openFileDialog.FileName); StreamReader sr = fi.OpenText(); while (!sr.EndOfStream) { String line = sr.ReadLine().Trim(); String[] parts = line.Split(','); UrlMD md; if (parts.Length == 1) { md = new UrlMD();//allows for a simple URL list on 1st invokation } else { md = new UrlMD(parts[1].Trim(), Convert.ToInt32(parts[2].Trim()), parts[3].Trim()); } ProtectedAdd2UrlSet(CleanTrailingSlash(parts[0].Trim()), md); } sr.Close(); //Repeat for the "New" list FileInfo fiNew = new FileInfo(fi.FullName.Replace(".csv", "_New.csv")); if (fiNew.Exists) { sr = fiNew.OpenText(); while (!sr.EndOfStream) { String line = sr.ReadLine().Trim(); String[] parts = line.Split(','); UrlMD md; if (parts.Length == 1) { md = new UrlMD();//allows for a simple URL list on 1st invokation } else { md = new UrlMD(parts[1].Trim(), Convert.ToInt32(parts[2].Trim())); } ProtectedAdd2NewUrlSet(CleanTrailingSlash(parts[0].Trim()), md); } sr.Close(); } //read from whitelist and blacklist files (one domain per line) from same directory fi = new FileInfo(Path.Combine(Path.GetDirectoryName(openFileDialog.FileName),"WhiteDomains")); if (fi.Exists) { sr = fi.OpenText(); while (!sr.EndOfStream) { String white = sr.ReadLine().Trim(); if (!WhiteDomains.ContainsKey(white) && white.Length > 0) { WhiteDomains.Add(white, white); } } } sr.Close(); fi = new FileInfo(Path.Combine(Path.GetDirectoryName(openFileDialog.FileName), "BlackDomains")); if (fi.Exists) { sr = fi.OpenText(); while (!sr.EndOfStream) { String black = sr.ReadLine().Trim(); if (!BlackDomains.ContainsKey(black) && black.Length>0) { BlackDomains.Add(black, black); } } } sr.Close(); //get the enumerator and get the 1st URL currentIndex = 0; UrlSetBare = new ArrayList(UrlSet.Keys); lblUrlCount.Text = UrlSetBare.Count.ToString(); UrlSetEnumerator = UrlSetBare.GetEnumerator(); UrlSetIterate(); btnSave.Enabled = true; } }
public void UrlSetIterate() { currentIndex++; bool url2get = UrlSetEnumerator.MoveNext(); //this bit skips over entries that have already been given a type if(url2get){ currentEntryUrl = UrlSetEnumerator.Current.ToString(); currentEntryMD = (UrlMD)UrlSet[currentEntryUrl]; while ((currentEntryMD.Type != "") && url2get) { currentIndex++; url2get = UrlSetEnumerator.MoveNext(); if (url2get) { currentEntryUrl = UrlSetEnumerator.Current.ToString(); currentEntryMD = (UrlMD)UrlSet[currentEntryUrl]; } } } //now load up the browser OR report all done if (url2get) { btnOpen.Enabled = false; grpDecisions.Enabled = false; grpExtract.Enabled = false; btnAbort.Enabled = true; abort = false; try { webBrowser.Navigate(currentEntryUrl); txtAddress.Text = currentEntryUrl; //wait for the browser object to load the page while ((webBrowser.ReadyState != WebBrowserReadyState.Complete) && !abort) { Application.DoEvents(); } grpExtract.Enabled = true; } catch (Exception ex) { webBrowser.Stop(); MessageBox.Show("Problem loading:" + "\r\n1. select \"defer\" or \"Reject\";" + "\r\n2. save;" + "\r\n3. close the program."); } btnAbort.Enabled = false; grpDecisions.Enabled = true; //show progress progressBar.Value = 100 * currentIndex / UrlSet.Count; } else { MessageBox.Show("No more URLs in loaded set.\r\nReload the CSV file if new URLs have been added."); grpDecisions.Enabled = false; grpExtract.Enabled = false; progressBar.Value = 100; } }
/// <summary> /// Adds a URL to the hashtable with some checking /// </summary> public void ProtectedAdd2UrlSet(String URL, UrlMD MD) { URL = CleanTrailingSlash(URL); if (!UrlSet.ContainsKey(URL)) { UrlSet.Add(URL, MD); } }
/// <summary> /// /// </summary> /// <param name="Type"></param> public void UpdateCurrentEntryType(String Type, String FeedUrl) { currentEntryMD.Type = Type; currentEntryMD.FeedUrl = FeedUrl; //check if the user navigated away. //if they ended up at a different URL then "reject" the old one and add the new with the chosen type //if they ended up at a different URL, add the new URL as if it was auto-extracted and mark the original URL as type="" String navUrl = CleanTrailingSlash(webBrowser.Url.ToString()); if (navUrl == currentEntryUrl) { UrlSet[currentEntryUrl] = currentEntryMD; } else { UrlSet[currentEntryUrl] = new UrlMD("reject"); //UrlSet.Remove(currentEntryUrl); currentEntryUrl = navUrl; ProtectedAdd2UrlSet(currentEntryUrl, currentEntryMD); } grpFile.Enabled = true; btnSave.Enabled = true; }
/// <summary> /// Adds a URL to the hashtable of NEW URLs with some checking /// </summary> public void ProtectedAdd2NewUrlSet(String URL, UrlMD MD) { URL = CleanTrailingSlash(URL); if (URL.Length > 0) { if (!NewUrlSet.ContainsKey(URL) && !UrlSet.ContainsKey(URL)) { NewUrlSet.Add(URL, MD); } } }