private static bool WordFilter(string title, WikiWordFilter filter) { if (filter.IgnoreWordsWithUpper & title.Any(char.IsUpper)) { return(false); } if (title.Length < filter.MinSymbols) { return(false); } if (filter.IgnoreSameLetters) { if (title.Length < 5) { if (CountWords(title, title[0].ToString()) > 3) { return(false); } } } foreach (string sym in filter.IgnoreSymbols) { if (title.Contains(sym)) { return(false); } } return(true); }
public static List <WikiPage> ParseCategory(string category, ref string progress, WikiWordFilter filter) { int counter = 0; string _continue = ""; List <WikiPage> pages = new List <WikiPage>(); while (run) { try { JObject json = DownloadJson(BuildCategoryUrlApi(category, _continue)); var keys = json["query"]["categorymembers"].ToList <JToken>(); foreach (JToken attribute in keys) { string title = attribute["title"].ToString(); string id = attribute["pageid"].ToString(); if (WordFilter(title, filter)) { pages.Add(new WikiPage { title = title, id = id }); } } if (json.Property("continue") == null) { break; } _continue = json["continue"]["cmcontinue"].ToString(); counter += 500; progress = "Downloading catalog: " + counter; } catch (Exception) { break; } } progress = "finished"; return(pages); }