object ProcessingBlock.ProcessData(object data, Agora.Builder.System.BaseApplication MyApplication) { this.data = data; string[] urls = (string[])data; fp.progressBar1.Maximum = urls.Length; fp.progressBar1.Value = 0; fp.Show(); Application.DoEvents(); Thread t = new Thread(new ParameterizedThreadStart(Crawl)); t.Start(MyApplication); while (!done) { Thread.Sleep(10); Application.DoEvents(); if (fp.Cancel) { t.Abort(); break; } } fp.Close(); fp.Dispose(); //MessageBox.Show(lstFisiere.Count.ToString()); return(lstFisiere.ToArray()); }
public frmConfiguration(Agora.Builder.System.BaseApplication MyApplication) { this.MyApplication = MyApplication; InitializeComponent(); getTopics(); MyApplication.Memory[0] = "C:\\temp\\arch"; }
object ProcessingBlock.ProcessData(object data, Agora.Builder.System.BaseApplication MyApplication) { frmMain fc = new frmMain(); fc.StartPosition = FormStartPosition.CenterScreen; fc.ShowDialog(); return(null); }
object ProcessingBlock.ProcessData(object data, Agora.Builder.System.BaseApplication MyApplication) { frmConfiguration fc = new frmConfiguration(MyApplication); fc.StartPosition = FormStartPosition.CenterScreen; fc.ShowDialog(); return(fc.ArticleList); ////throw new NotImplementedException(); //wb = new TextWebBrowser(); //string response = wb.Navigate("http://www.europarl.europa.eu/news/archive/search.do?language=RO"); //string[] topics = ExtractTopics(response); //string searchUrl = ExtractSearchUrl(response); ////activam search-ul //string[] urls = ExtractSearchResults(topics, searchUrl); //return urls; }
private void Crawl(object o) { Agora.Builder.System.BaseApplication MyApplication = (Agora.Builder.System.BaseApplication)o; TextWebBrowser wb = new TextWebBrowser(); String BasePath = MyApplication.Memory[0].ToString(); if (BasePath[BasePath.Length - 1] != '\\') { BasePath += "\\"; } TextWriter twMaster = new StreamWriter(BasePath + "descriptor.txt"); string[] urls = (string[])data; int i = 0; foreach (string url in urls) { if (fp.Cancel) { break; } SetText(url); i++; SetValue(i); //Log("Procesez : " + i + "/" + urls.Length); foreach (string limba in Limbi) { try { string response = wb.Navigate(url + limba); string text = RemoveHTML(response); TextWriter tw = new StreamWriter(BasePath + "" + i + "_" + limba + ".txt"); tw.Write(text); tw.Close(); tw.Dispose(); twMaster.WriteLine(BasePath + "" + i + "_" + limba + ".txt" + " " + url + " " + text.Split(new char[] { ' ', ',', '.', ';' }, StringSplitOptions.RemoveEmptyEntries).Length); lstFisiere.Add(BasePath + "" + i + "_" + limba + ".txt"); } catch (Exception e) { MessageBox.Show(e.ToString()); } } } twMaster.Close(); twMaster.Dispose(); done = true; }