private static ImmutableList <string> EntriesFromIndex(string url) { try { IDocument doc = Wiktionary.GetDocument(url); var entries = doc.QuerySelectorAll(EN_INDEX_ENTRY_SELECTOR).Where(a => !a.GetAttribute("href").Contains("redlink=1")).Select(a => a.TextContent).ToImmutableList <string>(); return(entries); } catch (Exception) { return(ImmutableList.Create <string>()); } }
private static ImmutableList <string> LinksFromIndexPage(string url) { //Console.Write(url); try { IDocument doc = Wiktionary.GetDocument(url); var links = doc.QuerySelectorAll(EN_INDEX_LINK_SELECTOR).Select(a => a.GetAttribute("href")).Select(href => String.Format(EN_ABSOLUTE_URL, href)).ToImmutableList <string>(); //Console.WriteLine(); return(links); } catch (Exception) { //Console.WriteLine(" - error."); return(ImmutableList.Create <string>()); } }
private static ImmutableList <string> EntriesFromCategory(string url) { try { IDocument doc = Wiktionary.GetDocument(url); var entries = doc.QuerySelectorAll(EN_CATEGORY_ENTRY_SELECTOR) .Where(a => !a.GetAttribute("href").Contains("redlink=1")) // eliminates links without entries .Where(a => !a.TextContent.Contains(":")) // eliminates Appendix: and Template: entries. .Select(a => a.TextContent).ToImmutableList <string>(); //entries.ForEach(e => Console.WriteLine(e)); return(entries); } catch (Exception) { return(ImmutableList.Create <string>()); } }
private static ImmutableList <string> LinksFromCategoryPage(string url) { try { //Console.WriteLine(url); IDocument doc = Wiktionary.GetDocument(url); var links = doc.QuerySelectorAll(EN_CATEGORY_LINK_SELECTOR).Select(a => a.GetAttribute("href")) .Where(href => href.StartsWith("/wiki/") || href.Contains("en.wiktionary.org")) .Select(href => href.StartsWith("//") ? "http:" + href : href) .Select(href => href.Contains("en.wiktionary.org") ? href : String.Format(EN_ABSOLUTE_URL, href)) .ToImmutableList <string>(); return(links); } catch (Exception) { return(ImmutableList.Create <string>()); } }