Example #1
0
        public void WikipediaParser_List_Definition()
        {
            var wikiText =
                @"
; term1
:     definition1
:     definition2
; term2
:     definition3
:     definition4
";
            var sectionHtml =
                @"<dl>
<dt>term1</dt>
<dd>definition1</dd>
<dd>definition2</dd>
<dt>term2</dt>
<dd>definition3</dd>
<dd>definition4</dd>
</dl>
";
            var parser = new WikipediaParser(wikiText);

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #2
0
        /// <summary>
        /// Handles the Click event of the btnOpenEditor control.
        /// </summary>
        /// <param name="sender">The source of the event.</param>
        /// <param name="e">The <see cref="System.EventArgs"/> instance containing the event data.</param>
        private void btnOpenEditor_Click(object sender, EventArgs e)
        {
            if (this.txtResponse.Text.Length > 0)
            {
                WikipediaParser parser = new WikipediaParser(
                    Configuration.ConnectionString
                    );

                Movie mov = parser.Parse(this.txtResponse.Text);

                // search for movie with title like mov.Title

                if (mov != null)
                {
                    MovieForm form = new MovieForm(mov);

                    if (form.ShowDialog(this) == DialogResult.OK)
                    {
                        // alles schliessen
                        this.DialogResult = DialogResult.OK;
                        this.Close();
                        this.DialogResult = DialogResult.OK;
                    }
                }
                else
                {
                    StaticWindows.ErrorBox("Fehler: Es konnte kein Film geparst werden!");
                }
            }
            else
            {
                StaticWindows.ErrorBox("Fehler: Es ist kein Wikipedia Artikel ausgewählt!");
            }
        }
Example #3
0
        public void WikipediaParser_List_Numbered_TwoLevels()
        {
            var wikiText =
                @"
& one
&&1.1
&&1.2
& two
& three
&&3.1
&& 3.2
";
            var sectionHtml =
                @"<ol>
<li>one
<ol>
<li>1.1</li>
<li>1.2</li>
</ol>
</li>
<li>two</li>
<li>three
<ol>
<li>3.1</li>
<li>3.2</li>
</ol>
</li>
</ol>
";
            var parser = new WikipediaParser(wikiText.Replace('&', '#'));

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #4
0
        public void WikipediaParser_List_Bullet_TwoLevels()
        {
            var wikiText =
                @"
* one
**1.1
**1.2
* two
* three
**3.1
** 3.2
";
            var sectionHtml =
                @"<ul>
<li>one
<ul>
<li>1.1</li>
<li>1.2</li>
</ul>
</li>
<li>two</li>
<li>three
<ul>
<li>3.1</li>
<li>3.2</li>
</ul>
</li>
</ul>
";
            var parser = new WikipediaParser(wikiText);

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #5
0
        public void WikipediaParser_List_LevelJumping()
        {
            var wikiText =
                @"
& One
& Two
&&& Level Jump
";
            var sectionHtml =
                @"<ol>
<li>One</li>
<li>Two
<ol>
<li>
<ol>
<li>Level Jump</li>
</ol>
</li>
</ol>
</li>
</ol>
";
            var parser = new WikipediaParser(wikiText.Replace('&', '#'));

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #6
0
        public void WikipediaParser_List_Indentation()
        {
            var wikiText =
                @"
: Single indent
:: Double indent
::::: Multiple indent
";
            var sectionHtml =
                @"<dl>
<dd>Single indent
<dl>
<dd>Double indent
<dl>
<dd>
<dl>
<dd>
<dl>
<dd>Multiple indent</dd>
</dl>
</dd>
</dl>
</dd>
</dl>
</dd>
</dl>
</dd>
</dl>
";
            var parser = new WikipediaParser(wikiText);

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #7
0
        /// <summary>
        /// Initializes incremental loading from the view.
        /// </summary>
        /// <param name="cancellationToken">The task cancellation token</param>
        /// <returns><see cref="System.Boolean"/></returns>
        private async Task <bool> LoadMoreItemsInternalAsync(CancellationToken cancellationToken)
        {
            string baseUrl    = "http://www.bing.com";
            string requestUrl = $"{baseUrl}/search?q=\"{_query}\"+(site:en.wikipedia.org+inbody:\"dead+link\")";

            List <string> usedUrls = new List <string>();

            while (!_cancellationToken.IsCancellationRequested && !cancellationToken.IsCancellationRequested && !requestUrl.IsNullOrEmpty())
            {
                try
                {
                    string response = await Downloader.DownloadTextAsync(requestUrl, note : "Requesting webpage from Bing...");

                    if (string.IsNullOrEmpty(response))
                    {
                        // Brute Force Algorithm
                        continue;
                    }

                    var links = WikipediaParser.ParseBingResults(response, usedUrls);
                    usedUrls.AddRange(links);

                    IEnumerable <WikipediaSearchResult> results = await Task.WhenAll(links.Select(async s =>
                                                                                                  new WikipediaSearchResult()
                    {
                        Source        = "Wikipedia",
                        SourceAddress = s,
                        Domains       = (await WikipediaParser.ParseWiki(s)).SelectMany(d => _parser.Invoke(d, "Wikipedia", _query))
                    }));

                    await System.Windows.Application.Current.Dispatcher.BeginInvoke(
                        System.Windows.Threading.DispatcherPriority.Normal, new Action(() =>
                    {
                        foreach (var result in results)
                        {
                            if (!_cancellationToken.IsCancellationRequested)
                            {
                                _store.Add(result);
                            }
                        }
                    }));

                    requestUrl = WikipediaParser.ParseNextPage(response, baseUrl);
                }
                catch (Exception)
                {
                    requestUrl = null;
                }
            }

            return(true);
        }
Example #8
0
 public void WikipediaParser_ProcessTemplates()
 {
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test."));
     Assert.AreEqual("This is a {{test.", WikipediaParser.ProcessTemplates("This is a {{test."));      // Unbalanced
     Assert.AreEqual("This is a {{test{{.", WikipediaParser.ProcessTemplates("This is a {{test{{."));  // Unbalanced
     Assert.AreEqual("This is a }}test.", WikipediaParser.ProcessTemplates("This is a }}test."));      // Unbalanced
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test."));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is {{XXX {{YYY}} }}a test."));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.{{XXX {{YYY}} }}"));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("{{XXX}}This is a test."));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This{{XXX}} is a test."));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.{{XXX}}"));
     Assert.AreEqual(string.Format("This is a test: {0:yyyy}", DateTime.UtcNow), WikipediaParser.ProcessTemplates("This is a test: {{CURRENTYEAR}}"));
     Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is {{XXX {{CURRENTYEAR}} }}a test."));
 }
Example #9
0
        public void WikipediaParser_Link_Endings()
        {
            var parser = new WikipediaParser(
                @"
[[ Help ]]ers
");

            string sectionHtml =
                @"<p>
<a href=""http://www.wikipedia.org/wiki/Help"" target=""_blank"" rel=""nofollow"">Helpers</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #10
0
        public void WikipediaParser_Link_DiscardFile()
        {
            var parser = new WikipediaParser(
                @"
[[ File:Test Link ]] This is a test.
");

            string sectionHtml =
                @"<p>
This is a test.
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #11
0
        public void WikipediaParser_Link_AltText()
        {
            var parser = new WikipediaParser(
                @"
[[ Test Link | Hello World! ]]
");

            string sectionHtml =
                @"<p>
<a href=""http://www.wikipedia.org/wiki/Test_Link"" target=""_blank"" rel=""nofollow"">Hello World!</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #12
0
        public void WikipediaParser_Link_Https()
        {
            var parser = new WikipediaParser(
                @"
Link: https://www.lilltek.com
");

            string sectionHtml =
                @"<p>
Link: <a href=""https://www.lilltek.com"" target=""_blank"" rel=""nofollow"">https://www.lilltek.com</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #13
0
        public void WikipediaParser_Paragraph_SingleLine()
        {
            var parser = new WikipediaParser(
                @"
This is a test of the emergency broadcasting system.
");

            string sectionHtml =
                @"<p>
This is a test of the emergency broadcasting system.
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #14
0
        public void WikipediaParser_Artifact_Removal()
        {
            var parser = new WikipediaParser(
                @"
before()-after
before( )-after
");

            string sectionHtml =
                @"<p>
before-after
before-after
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #15
0
        public void WikipediaParser_Wikipedia_Test()
        {
            var inputFile  = @"Seattle.txt";
            var outputPath = string.Format(@"C:\Temp\WikipediaTest\{0}.htm", Path.GetFileNameWithoutExtension(inputFile));
            var parser     = new WikipediaParser(ReadResourceText(inputFile));

            Assert.AreEqual("http://www.wikipedia.org/wiki/Seattle", parser.SourceUri);

            parser.RenderAsHtmlPage(outputPath);

            inputFile  = @"Lynnwood_Washington.txt";
            outputPath = string.Format(@"C:\Temp\WikipediaTest\{0}.htm", Path.GetFileNameWithoutExtension(inputFile));
            parser     = new WikipediaParser(ReadResourceText(inputFile));

            Assert.AreEqual("http://www.wikipedia.org/wiki/Lynnwood,_Washington", parser.SourceUri);

            parser.RenderAsHtmlPage(outputPath);
        }
Example #16
0
        public void WikipediaParser_HRTag()
        {
            var parser = new WikipediaParser(
                @"
Test
----
");

            string sectionHtml =
                @"<p>
Test
<hr />
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #17
0
        public void WikipediaParser_Link_PageLinkAnchorAltText()
        {
            var parser = new WikipediaParser(
                @"
<page><title>Test Page</title><text>
[[ Another Page#anchor | My Page ]]
</text></page>
");

            string sectionHtml =
                @"<p>
<a href=""http://www.wikipedia.org/wiki/Another_Page#anchor"" target=""_blank"" rel=""nofollow"">My Page</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #18
0
        public void WikipediaParser_List_Complex()
        {
            var wikiText =
                @"
& Start each line
& with a number sign.
&& More number signs gives deeper
&&& and deeper
&&& levels.
& Line breaks don't break levels.
&&& But jumping levels creates empty space.
& Blank lines end the list
";
            var sectionHtml =
                @"<ol>
<li>Start each line</li>
<li>with a number sign.
<ol>
<li>More number signs gives deeper
<ol>
<li>and deeper</li>
<li>levels.</li>
</ol>
</li>
</ol>
</li>
<li>Line breaks don't break levels.
<ol>
<li>
<ol>
<li>But jumping levels creates empty space.</li>
</ol>
</li>
</ol>
</li>
<li>Blank lines end the list</li>
</ol>
";
            var parser = new WikipediaParser(wikiText.Replace('&', '#'));

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #19
0
        public void WikipediaParser_List_Bullet_OneLevel()
        {
            var wikiText =
                @"
* one
* two
* three
";
            var sectionHtml =
                @"<ul>
<li>one</li>
<li>two</li>
<li>three</li>
</ul>
";
            var parser = new WikipediaParser(wikiText);

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #20
0
        public void WikipediaParser_List_BulletsWithLinks()
        {
            var wikiText =
                @"
* [[ Link1 ]]
* [[ Link2 ]]
* [[ Link3 ]]
";
            var sectionHtml =
                @"<ul>
<li><a href=""http://www.wikipedia.org/wiki/Link1"" target=""_blank"" rel=""nofollow"">Link1</a></li>
<li><a href=""http://www.wikipedia.org/wiki/Link2"" target=""_blank"" rel=""nofollow"">Link2</a></li>
<li><a href=""http://www.wikipedia.org/wiki/Link3"" target=""_blank"" rel=""nofollow"">Link3</a></li>
</ul>
";
            var parser = new WikipediaParser(wikiText);

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #21
0
        public void WikipediaParser_Preformatted()
        {
            var parser = new WikipediaParser(
                @"
 This is a test of the emergency broadcasting system.
 This is only a test.  In the event of a real emergency
 we'd be pretty much screwed right now.
");

            string sectionHtml =
                @"<pre>
This is a test of the emergency broadcasting system.
This is only a test.  In the event of a real emergency
we'd be pretty much screwed right now.
</pre>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #22
0
        public void WikipediaParser_Link_MultipleWiki()
        {
            var parser = new WikipediaParser(
                @"
Link1: [[ Test Link 1 ]]
Link2: [[ Test Link 2 ]]
Link3: [[ Test Link 3 ]]
");

            string sectionHtml =
                @"<p>
Link1: <a href=""http://www.wikipedia.org/wiki/Test_Link_1"" target=""_blank"" rel=""nofollow"">Test Link 1</a>
Link2: <a href=""http://www.wikipedia.org/wiki/Test_Link_2"" target=""_blank"" rel=""nofollow"">Test Link 2</a>
Link3: <a href=""http://www.wikipedia.org/wiki/Test_Link_3"" target=""_blank"" rel=""nofollow"">Test Link 3</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #23
0
        public void WikipediaParser_Link_Empty()
        {
            var parser = new WikipediaParser(
                @"
Hello
Link: [[ ]]
World
");

            string sectionHtml =
                @"<p>
Hello
Link: 
World
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #24
0
        public void WikipediaParser_Link_MultipleHttp()
        {
            var parser = new WikipediaParser(
                @"
Link1: http://www.lilltek.com
Link2: http://www.google.com/test.aspx?hello=world
Link3: https://microsoft.com/
");

            string sectionHtml =
                @"<p>
Link1: <a href=""http://www.lilltek.com"" target=""_blank"" rel=""nofollow"">http://www.lilltek.com</a>
Link2: <a href=""http://www.google.com/test.aspx?hello=world"" target=""_blank"" rel=""nofollow"">http://www.google.com/test.aspx?hello=world</a>
Link3: <a href=""https://microsoft.com/"" target=""_blank"" rel=""nofollow"">https://microsoft.com/</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #25
0
        public void WikipediaParser_StripXml()
        {
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("<page><text>Hello World!</text></page>").Trim());
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("Hello World!"));
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("#REDIRECT [xxx]\nHello World!"));
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("#redirect [xxx]\nHello World!"));
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("<page><text>#REDIRECT [xxx]\nHello World!</text></page>").Trim());
            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect(
                                @"<page>
<text>
Hello World!
</text>
</page>").Trim());

            Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect(
                                @"<page>
<text foo=""bar"">
Hello World!
</text>
</page>").Trim());
        }
Example #26
0
        public void WikipediaParser_Link_External()
        {
            var parser = new WikipediaParser(
                @"
Link1: [ http://www.microsoft.com/ Microsoft ]
Link2: [ http://google.com ]
Link3: [ https://www.lilltek.com/ LillTek ]
Link4: [ https://forbes.com ]
");

            string sectionHtml =
                @"<p>
Link1: <a href=""http://www.microsoft.com/"" target=""_blank"" rel=""nofollow"">Microsoft</a>
Link2: [<a href=""http://google.com"" target=""_blank"" rel=""nofollow"">1</a>]
Link3: <a href=""https://www.lilltek.com/"" target=""_blank"" rel=""nofollow"">LillTek</a>
Link4: [<a href=""https://forbes.com"" target=""_blank"" rel=""nofollow"">2</a>]
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #27
0
        public void WikipediaParser_ParsePageXml()
        {
            var parser = new WikipediaParser(
                @"  <page>
    <title>Lynnwood, Washington</title>
    <id>138213</id>
    <revision>
      <id>421566368</id>
      <timestamp>2011-03-30T23:40:34Z</timestamp>
      <contributor>
        <username>L5gcw0b</username>
        <id>14294239</id>
      </contributor>
      <minor />
      <comment>/* Neighborhood parks */</comment>
      <text xml:space=""preserve"">{{redirect|Lynnwood}}
Hello World!

==History==
This is the history

==Geography==
This is the geography.
      </text>
    </revision>
  </page>");

            Assert.AreEqual(1, parser.Sections[0].Level);
            Assert.AreEqual("Lynnwood, Washington", parser.Sections[0].Title);

            Assert.AreEqual(2, parser.Sections[1].Level);
            Assert.AreEqual("History", parser.Sections[1].Title);

            Assert.AreEqual(2, parser.Sections[2].Level);
            Assert.AreEqual("Geography", parser.Sections[2].Title);
        }
Example #28
0
        public void WikipediaParser_Link_Nested()
        {
            // Make sure that links with other links nested within are removed.

            var parser = new WikipediaParser(
                @"
Hello
Link1: [[ Test Link 1 | [[ Nested Link ]] [[ Another Nested ]] ]]
Link2: [[ Test Link 2 ]]
World
");

            string sectionHtml =
                @"<p>
Hello
Link1: 
Link2: <a href=""http://www.wikipedia.org/wiki/Test_Link_2"" target=""_blank"" rel=""nofollow"">Test Link 2</a>
World
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #29
0
        public void WikipediaParser_Link_MultipleMixed()
        {
            var parser = new WikipediaParser(
                @"
Link1: [[ Test Link 1 ]]
Link2: http://www.google.com/test.aspx?hello=world
Link3: [[ Test Link 3 ]]
Link4: https://microsoft.com/
Link5: [[ Test Link 5 ]]
");

            string sectionHtml =
                @"<p>
Link1: <a href=""http://www.wikipedia.org/wiki/Test_Link_1"" target=""_blank"" rel=""nofollow"">Test Link 1</a>
Link2: <a href=""http://www.google.com/test.aspx?hello=world"" target=""_blank"" rel=""nofollow"">http://www.google.com/test.aspx?hello=world</a>
Link3: <a href=""http://www.wikipedia.org/wiki/Test_Link_3"" target=""_blank"" rel=""nofollow"">Test Link 3</a>
Link4: <a href=""https://microsoft.com/"" target=""_blank"" rel=""nofollow"">https://microsoft.com/</a>
Link5: <a href=""http://www.wikipedia.org/wiki/Test_Link_5"" target=""_blank"" rel=""nofollow"">Test Link 5</a>
</p>
";

            Assert.AreEqual(1, parser.Sections.Count);
            Assert.AreEqual(sectionHtml, parser.Sections[0].Html);
        }
Example #30
0
        public void WikipediaParser_StripMagicWords()
        {
            var text = "This is a test__NOTOC__ of the__NONEWSECTIONLINK__ emergency__NOINDEX__ broadcasting system.";

            Assert.AreEqual("This is a test of the emergency broadcasting system.", WikipediaParser.StripMagicWords(text));
        }