private void LaunchAllEventsAndClearMemory(int cid, string roUrl, string enUrl, string deUrl, WikitravelDataExtraction wde) { DataCollectedEventArgs e = new DataCollectedEventArgs(); e.CrawlerID = cid; e.Text = wde.TextContents; e.URL = roUrl; e.Language = LanguageType.RO; if (this.DataCollected != null) { DataCollected(e); } wde = new WikitravelDataExtraction(enUrl); e.CrawlerID = cid; e.Text = wde.TextContents; e.URL = enUrl; e.Language = LanguageType.EN; if (this.DataCollected != null) { DataCollected(e); } wde = new WikitravelDataExtraction(deUrl); e.CrawlerID = cid; e.Text = wde.TextContents; e.URL = deUrl; e.Language = LanguageType.DE; if (this.DataCollected != null) { DataCollected(e); } }
private void WorkingThread() { //progresul curent int currentFileProgress = 0; //stiva de procesare pe site-uri List <string> crawlingStack = new List <string>(); //primul url este cel de baza crawlingStack.Add(baseURL + serverLocation); //cata vreme mai putem procesa si nu am atins limita while ((currentFileProgress < crawlingStack.Count) && (currentFileProgress < needed)) { string roUrl = crawlingStack[currentFileProgress]; string enUrl = ""; string deUrl = ""; WikitravelDataExtraction wde = new WikitravelDataExtraction(roUrl); //cautam echivalentele in celelalte limbi si linkuri pe care ar trebui sa le urmam foreach (string tmp in wde.FollowLinks) { if (tmp.Contains("wikitravel.org/de/")) { deUrl = tmp; } else if (tmp.Contains("wikitravel.org/en/")) { enUrl = tmp; } else if (tmp.Contains("href=\"/ro")) { //trebuie sa procesam url-ul pentru al adauga in lista string temp = tmp.Substring(6); temp = temp.Substring(0, temp.Length - 1); crawlingStack.Add(this.baseURL + temp); } } try{ enUrl = enUrl.Substring(6); enUrl = enUrl.Substring(0, enUrl.Length - 1); }catch { } try { deUrl = deUrl.Substring(6); deUrl = deUrl.Substring(0, deUrl.Length - 1); } catch { } //lansam evenimentul pentru site-ul ro LaunchAllEventsAndClearMemory(currentFileProgress, roUrl, enUrl, deUrl, wde); wde = null; GC.Collect(); currentFileProgress++; } if (NoMoreData != null) { NoMoreData(); } }