Пример #1
0
 private static ImmutableList <string> EntriesFromIndex(string url)
 {
     try
     {
         IDocument doc     = Wiktionary.GetDocument(url);
         var       entries = doc.QuerySelectorAll(EN_INDEX_ENTRY_SELECTOR).Where(a => !a.GetAttribute("href").Contains("redlink=1")).Select(a => a.TextContent).ToImmutableList <string>();
         return(entries);
     }
     catch (Exception)
     {
         return(ImmutableList.Create <string>());
     }
 }
Пример #2
0
 private static ImmutableList <string> LinksFromIndexPage(string url)
 {
     //Console.Write(url);
     try
     {
         IDocument doc   = Wiktionary.GetDocument(url);
         var       links = doc.QuerySelectorAll(EN_INDEX_LINK_SELECTOR).Select(a => a.GetAttribute("href")).Select(href => String.Format(EN_ABSOLUTE_URL, href)).ToImmutableList <string>();
         //Console.WriteLine();
         return(links);
     }
     catch (Exception)
     {
         //Console.WriteLine(" - error.");
         return(ImmutableList.Create <string>());
     }
 }
Пример #3
0
 private static ImmutableList <string> EntriesFromCategory(string url)
 {
     try
     {
         IDocument doc     = Wiktionary.GetDocument(url);
         var       entries = doc.QuerySelectorAll(EN_CATEGORY_ENTRY_SELECTOR)
                             .Where(a => !a.GetAttribute("href").Contains("redlink=1")) // eliminates links without entries
                             .Where(a => !a.TextContent.Contains(":"))                  // eliminates Appendix: and Template: entries.
                             .Select(a => a.TextContent).ToImmutableList <string>();
         //entries.ForEach(e => Console.WriteLine(e));
         return(entries);
     }
     catch (Exception)
     {
         return(ImmutableList.Create <string>());
     }
 }
Пример #4
0
 private static ImmutableList <string> LinksFromCategoryPage(string url)
 {
     try
     {
         //Console.WriteLine(url);
         IDocument doc   = Wiktionary.GetDocument(url);
         var       links = doc.QuerySelectorAll(EN_CATEGORY_LINK_SELECTOR).Select(a => a.GetAttribute("href"))
                           .Where(href => href.StartsWith("/wiki/") || href.Contains("en.wiktionary.org"))
                           .Select(href => href.StartsWith("//") ? "http:" + href : href)
                           .Select(href => href.Contains("en.wiktionary.org") ? href : String.Format(EN_ABSOLUTE_URL, href))
                           .ToImmutableList <string>();
         return(links);
     }
     catch (Exception)
     {
         return(ImmutableList.Create <string>());
     }
 }