void ParsePage(ref CardsStream stream, HtmlDocument document, string link, string userLabels)
        {
            var examples = document.DocumentNode.SelectNodes("//div[@class='EXAMPLES']");

            if (examples == null)
            {
                reportStream.Write("Failure. Examples was not found. Link: " + link + "\n");
                return;
            }

            var gbrTranscription = document.DocumentNode.SelectSingleNode("//span[@class='PRON']");
            var word = getText(document.DocumentNode.SelectSingleNode("//span[@class='BASE']"));

            var wordLabel = word.Replace(' ', '-');
            userLabels = (userLabels == "") ? "macmillan " + wordLabel : "macmillan " + wordLabel + " " + userLabels;

            foreach (HtmlNode example in examples)
            {
                var card = new Card();
                var parentNode = example.ParentNode;

                //Getting a structure
                var structure = example.SelectSingleNode("strong");
                if (structure == null)
                {
                    //DIV#SENSE_BODY -> DIV -> LI
                    structure = parentNode.ParentNode.ParentNode.SelectSingleNode("div/h2/span[@class='BASE']");
                }

                card.Sentence = getText(example.SelectSingleNode("p[@id='EXAMPLE']"));
                card.Definition = getText(parentNode.SelectSingleNode("span[@class='DEFINITION']"));
                card.GbrTranscription = getText(gbrTranscription).Replace("/", "");
                card.SimpleStructure = getText(structure).Replace(":", "");

                if (card.Definition == "")
                {
                    reportStream.Write("Failure. Definition was not found. Link: " + link + " Example: '" + card.Sentence + "'\n");
                }
                if (card.Sentence == "")
                {
                    reportStream.Write("Failure. Example was not found. Link: " + link + "\n");
                }

                var outStr = card.Sentence + "\t" + card.Interpretation +
                   "\t" + word + "\t" + card.GbrTranscription +
                   "\t" + card.UsaTranscription + "\t" +
                   card.SimpleStructure + "\t" + card.Definition +
                   "\t" + userLabels + "\n";
                stream.Write(outStr);
                count_++;
            }
        }
Пример #2
0
        void ParsePage(ref CardsStream stream, HtmlDocument document, string link, string userLabels)
        {
            var examples = document.DocumentNode.SelectNodes("//span[@class='x-g']");

            if (examples == null)
            {
                reportStream.Write("Failure. Examples was not found. Link: " + link + "\n");
                return;
            }

            //Transcription
            var usaTranscription = document.DocumentNode.SelectSingleNode("//span[@class='y']");
            var gbrTranscription = document.DocumentNode.SelectSingleNode("//span[@class='i']");

            //Word "name"
            var word = getText(document.DocumentNode.SelectSingleNode("//h2[@class='h']"));

            //Label
            var wordLabel = word.Replace(' ', '-');
            userLabels = (userLabels == "") ? "oald8 " + wordLabel : "oald8 " + wordLabel + " " + userLabels;

            foreach (HtmlNode example in examples)
            {
                var card = new Card();
                var parentNode = example.ParentNode;

                //Getting a structure
                var structure1 = example.SelectSingleNode("span[@class='cf']");
                if (GetName(parentNode) == "pv-g")
                {
                    var structure2 = parentNode.SelectSingleNode("h4[@class='pv']");
                    if (structure2 != null) card.Structure.Add(structure2.InnerText);
                    if (structure1 != null) card.Structure.Add(structure1.InnerText);
                }
                else if (GetName(parentNode) == "n-g" && GetName(parentNode.ParentNode) == "pv-g")
                {
                    var structure2 = parentNode.SelectSingleNode("span[@class='vs-g']");
                    var structure3 = parentNode.ParentNode.SelectSingleNode("h4[@class='pv']");
                    if (structure3 != null) card.Structure.Add(structure3.InnerText);
                    if (structure2 != null) card.Structure.Add(structure2.InnerText);
                    if (structure1 != null) card.Structure.Add(structure1.InnerText);
                }
                else if (GetName(parentNode) == "n-g")
                {
                    var structure2 = parentNode.SelectSingleNode("span[@class='cf']");
                    if (structure2 != null) card.Structure.Add(structure2.InnerText);
                    if (structure1 != null) card.Structure.Add(structure1.InnerText);
                }
                else if (GetName(parentNode) == "id-g")
                {
                    var structure2 = parentNode.SelectSingleNode("h4[@class='id']");
                    if (structure2 != null) card.Structure.Add(structure2.InnerText);
                    if (structure1 != null) card.Structure.Add(structure1.InnerText);
                }
                else if (GetName(parentNode) == "h-g")
                {
                    var structure2 = parentNode.SelectSingleNode("span[@class='cf']");
                    if (structure2 != null) card.Structure.Add(structure2.InnerText);
                    if (structure1 != null) card.Structure.Add(structure1.InnerText);
                }

                //Getting a definition
                HtmlNode definition = null;

                if (GetName(parentNode) == "id-g" || GetName(parentNode) == "h-g")
                {
                    definition = parentNode.SelectSingleNode("div[@class='def_block']");
                }
                if (definition == null)
                {
                    definition = parentNode;
                }

                var temp = definition.SelectSingleNode("span[@class='ud']");
                if (temp == null)
                {
                    definition = definition.SelectSingleNode("span[@class='d']");
                }
                else {
                    definition = temp;
                }

                //An example itself
                card.Interpretation = getText(example.SelectSingleNode("span[@class='x']"));
                card.Sentence = (new Regex(" \\(=.*?\\)")).Replace(card.Interpretation, "");

                card.Definition = getText(definition);
                card.Definition = card.Definition.Replace("    ", " ");

                card.UsaTranscription = getText(usaTranscription);
                card.GbrTranscription = getText(gbrTranscription);

                if (card.Interpretation == card.Sentence)
                {
                    card.Interpretation = "";
                }
                if (card.Definition == "")
                {
                    reportStream.Write("Failure. Definition was not found. Link: " + link + " Example: '" + card.Sentence + "'\n");
                }
                if (card.Sentence == "")
                {
                    reportStream.Write("Failure. Example was not found. Link: " + link + "\n");
                }

                var outStr = card.Sentence + "\t" + card.Interpretation +
                    "\t" + word + "\t" + card.GbrTranscription +
                    "\t" + card.UsaTranscription + "\t" +
                    PrintList(card.Structure) + "\t" + card.Definition +
                    "\t" + userLabels + "\n";
                stream.Write(outStr);
                count_++;
            }
        }