protected override ResourceInformation GetResourceInformation(LeechedResourceBase leechedResource) { WikipediaItem resource = leechedResource as WikipediaItem; WikipediaResourceInformation ret = extractResourceInformation(resource, _wiki.Query .allpages() .Where(p => p.from == resource.Title && p.to == resource.Title).Pages); // it´s not a page, so try with a different search mode if (ret == null) { ret = extractResourceInformation(resource, _wiki.Query.search(resource.Title).Pages); } // failsafe! if cannot get the resource info form the wiki, make it up with what we have already. if (ret == null) { ret = new WikipediaResourceInformation { Content = resource.Title, Categories = new List <ResourceCategory> { new ResourceCategory { Name = resource.ParentItem.Title } } }; } resource.Links = ret.Links; return(cleanContent(ret)); }
public WikipediaItem(infoResult page, WikipediaItem parent) { PageId = page.pageid.GetValueOrDefault(); Type = categorymemberstype.page.Value; Title = page.title; ParentItem = parent; }
public WikipediaItem(categorymembersSelect cm, WikipediaItem parent) { PageId = cm.pageid; Type = cm.type.Value; Title = cm.title; ParentItem = parent; }
private WikipediaResourceInformation extractResourceInformation(WikipediaItem resource, PagesSource <Page> page) { WikipediaResourceInformation ret; try { ret = page.Select(p => new WikipediaResourceInformation { Content = resource.Title + Environment.NewLine + p.revisions().Select(r => r.value).ToEnumerable().FirstOrDefault(), Categories = p.categories().Where(c => c.show == categoriesshow.not_hidden) .Select(c => new WikipediaCategory { Name = c.title }) .ToList().Cast <ResourceCategory>().ToList(), Links = resource.Depth < _maximumDepth ? p.links().Where(l => l.ns == Namespace.Article).Select(l => l.title).ToList() : new List <string>() }) .ToEnumerable().FirstOrDefault(); if (ret != null && (ret.Categories == null || ret.Categories.Count == 0)) { ret.Categories = new List <ResourceCategory> { new WikipediaCategory { Name = resource.ParentItem.Title } }; } } catch (Exception ex) { Debug.WriteLine("Error occurred extracting the Resource Information: {0}{1} * Resource Title: {2}", ex, Environment.NewLine, resource.Title); ret = null; } return(ret); }
private IEnumerable <LeechedResourceBase> leechPages(PagesSource <Page> pagesSource, WikipediaItem parent) { IEnumerator <infoResult> pagesEnumerator = null; try { pagesEnumerator = pagesSource.Select(p => p.info).ToEnumerable().GetEnumerator(); } catch (Exception ex) { Debug.WriteLine("Exception encountered enumerating the pages of \"{0}\": {1}", parent.Title, ex); } if (pagesEnumerator != null) { infoResult page = null; do { try { if (!pagesEnumerator.MoveNext()) { page = null; } else { page = pagesEnumerator.Current; } } catch (Exception ex) { Debug.WriteLine("Exception encountered leeching the childs of \"{0}\": {1}", parent.Title, ex); } if (page != null) { WikipediaItem pageItem = new WikipediaItem(page, parent); yield return(pageItem); if (pageItem.Links != null && pageItem.Links.Count > 0) { foreach (string link in pageItem.Links) { IEnumerator <LeechedResourceBase> childPagesEnumerator = null; try { var childPages = _wiki.Query.allpages() .Where(c => c.from == link && c.to == link) .Pages; childPagesEnumerator = leechPages(childPages, pageItem).GetEnumerator(); } catch (Exception ex) { Debug.WriteLine("Exception encountered enumerating the link \"{2}\" of \"{0}\": {1}", pageItem.Title, ex, link); } if (childPagesEnumerator != null) { LeechedResourceBase childPage = null; do { try { if (!childPagesEnumerator.MoveNext()) { childPage = null; } else { childPage = childPagesEnumerator.Current; } } catch (Exception ex) { Debug.WriteLine("Exception encountered leeching the link \"{2}\" of \"{0}\": {1}", pageItem.Title, ex, link); } if (childPage != null) { yield return(childPage); } } while (childPage != null); } } } } } while (page != null); } }
private IEnumerable <LeechedResourceBase> leechContents(WikipediaItem parent) { IEnumerator <LeechedResourceBase> pagesEnumerator = null; try { var pages = _wiki.Query.categorymembers() .Where(c => c.title == parent.Title && c.type == categorymemberstype.page) .Pages; pagesEnumerator = leechPages(pages, parent).GetEnumerator(); } catch (Exception ex) { Debug.WriteLine("Exception encountered enumerating the members of \"{0}\": {1}", parent.Title, ex); } if (pagesEnumerator != null) { LeechedResourceBase page = null; do { try { page = pagesEnumerator.MoveNext() ? pagesEnumerator.Current : null; } catch (Exception ex) { Debug.WriteLine("Exception encountered leeching the members of \"{0}\": {1}", parent.Title, ex); } if (page != null) { yield return(page); } } while (page != null); } IEnumerator <categorymembersSelect> categoriesEnumerator = null; try { categoriesEnumerator = _wiki.Query.categorymembers() .Where(c => c.title == parent.Title && c.type == categorymemberstype.subcat) .ToEnumerable() .GetEnumerator(); } catch (Exception ex) { Debug.WriteLine("Exception encountered enumerating the subcategories of \"{0}\": {1}", parent.Title, ex); } if (categoriesEnumerator != null) { categorymembersSelect category = null; do { try { category = categoriesEnumerator.MoveNext() ? categoriesEnumerator.Current : null; } catch (Exception ex) { Debug.WriteLine("Exception encountered leeching the subcategories of \"{0}\": {1}", parent.Title, ex); } if (category != null) { WikipediaItem categoryItem = new WikipediaItem(category, parent); if (categoryNameBrowsable(categoryItem.Title) && browsedCategories.All(bc => bc.PageId != categoryItem.PageId)) { browsedCategories.Add(categoryItem); foreach (LeechedResourceBase leechedResource in leechContents(categoryItem)) { yield return(leechedResource); } } } } while (category != null); } }