Exemplo n.º 1
0
    public static bool PageFilter(WikiPageContent page, WikiPageFilter filter)
    {
        foreach (string m in filter.IgnoreMorphology)
        {
            if (page.morph.Contains(m))
            {
                return(false);
            }
        }
        foreach (string m in filter.IgnoreValue)
        {
            if (page.value.Contains(m))
            {
                return(false);
            }
        }

        string[] vals = page.value.Split('\n');
        foreach (string m in filter.IgnoreValueText)
        {
            int valueContainsCount = 0;
            foreach (string v in vals)
            {
                if (v.Contains(m))
                {
                    valueContainsCount++;
                }
            }
            if (valueContainsCount == vals.Length)
            {
                return(false);
            }
        }

        foreach (string m in filter.IgnoreGiponims)
        {
            if (page.giponims.Contains(m))
            {
                return(false);
            }
        }
        foreach (string m in filter.IgnoreGiperonims)
        {
            if (page.giperonims.Contains(m))
            {
                return(false);
            }
        }
        return(true);
    }
Exemplo n.º 2
0
    public static Dictionary <string, WikiPageContent> ParsePages(List <WikiPage> pages, ref string progress, WikiPageFilter filter)
    {
        var dict    = new Dictionary <string, WikiPageContent>();
        int counter = 0;

        foreach (var page in pages)
        {
            if (!run)
            {
                break;
            }
            try
            {
                WikiPageContent currentPage = ParsePage(page.title);
                if (PageFilter(currentPage, filter))
                {
                    dict.Add(page.title, currentPage);
                }
            }
            catch (Exception) { }

            counter++;
            progress = String.Format("Downloading pages: {0}/{1}", counter, pages.Count);
        }

        progress = "finished";
        return(dict);
    }