Exemplo n.º 1
0
Arquivo: Page.cs Projeto: dswisher/nlp
        private void ParseRevision(XmlTextReader reader)
        {
            reader.ExpectStartElement("revision");
            var startLine = reader.LineNumber;

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    switch (reader.Name)
                    {
                    case "id":
                    case "parentid":
                        // Ignore these things
                        reader.SkipElement();
                        break;

                    case "text":
                        Text = reader.ParseTextElement();
                        break;
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (reader.Name == "revision")
                    {
                        return;
                    }
                    break;
                }
            }
        }
Exemplo n.º 2
0
        private static List <Page> ParseMediaWiki(XmlTextReader reader)
        {
            reader.ExpectStartElement("mediawiki");

            var pages = new List <Page>();

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    if (reader.Name == "page")
                    {
                        pages.Add(Page.Parse(reader));
                        reader.ExpectEndElement("page");
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (reader.Name == "mediawiki")
                    {
                        return(pages);
                    }
                    break;
                }
            }

            return(pages);
        }
Exemplo n.º 3
0
Arquivo: Page.cs Projeto: dswisher/nlp
        public static Page Parse(XmlTextReader reader)
        {
            reader.ExpectStartElement("page");

            var startLine = reader.LineNumber;
            var page      = new Page();

            while (reader.Read())
            {
                switch (reader.NodeType)
                {
                case XmlNodeType.Element:
                    switch (reader.Name)
                    {
                    case "id":
                        page.Id = int.Parse(reader.ParseSimpleElement());
                        break;

                    case "ns":
                        page.Namespace = int.Parse(reader.ParseSimpleElement());
                        break;

                    case "redirect":
                        page.Redirect = reader.GetAttribute("title");
                        break;

                    case "revision":
                        page.ParseRevision(reader);
                        break;

                    case "restrictions":
                        // Ignore these things
                        reader.SkipElement();
                        break;

                    case "title":
                        page.Title = reader.ParseSimpleElement();
                        break;

                    default:
                        throw new ParseException(reader, "Unexpected element, '{0}', at top-level of <page>.", reader.Name);
                    }
                    break;

                case XmlNodeType.EndElement:
                    if (reader.Name == "page")
                    {
                        return(page);
                    }
                    break;
                }
            }

            throw new ParseException("The <page> element starting on line {0} is not closed!", startLine);
        }