public void WikipediaParser_List_Definition() { var wikiText = @" ; term1 : definition1 : definition2 ; term2 : definition3 : definition4 "; var sectionHtml = @"<dl> <dt>term1</dt> <dd>definition1</dd> <dd>definition2</dd> <dt>term2</dt> <dd>definition3</dd> <dd>definition4</dd> </dl> "; var parser = new WikipediaParser(wikiText); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
/// <summary> /// Handles the Click event of the btnOpenEditor control. /// </summary> /// <param name="sender">The source of the event.</param> /// <param name="e">The <see cref="System.EventArgs"/> instance containing the event data.</param> private void btnOpenEditor_Click(object sender, EventArgs e) { if (this.txtResponse.Text.Length > 0) { WikipediaParser parser = new WikipediaParser( Configuration.ConnectionString ); Movie mov = parser.Parse(this.txtResponse.Text); // search for movie with title like mov.Title if (mov != null) { MovieForm form = new MovieForm(mov); if (form.ShowDialog(this) == DialogResult.OK) { // alles schliessen this.DialogResult = DialogResult.OK; this.Close(); this.DialogResult = DialogResult.OK; } } else { StaticWindows.ErrorBox("Fehler: Es konnte kein Film geparst werden!"); } } else { StaticWindows.ErrorBox("Fehler: Es ist kein Wikipedia Artikel ausgewählt!"); } }
public void WikipediaParser_List_Numbered_TwoLevels() { var wikiText = @" & one &&1.1 &&1.2 & two & three &&3.1 && 3.2 "; var sectionHtml = @"<ol> <li>one <ol> <li>1.1</li> <li>1.2</li> </ol> </li> <li>two</li> <li>three <ol> <li>3.1</li> <li>3.2</li> </ol> </li> </ol> "; var parser = new WikipediaParser(wikiText.Replace('&', '#')); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_Bullet_TwoLevels() { var wikiText = @" * one **1.1 **1.2 * two * three **3.1 ** 3.2 "; var sectionHtml = @"<ul> <li>one <ul> <li>1.1</li> <li>1.2</li> </ul> </li> <li>two</li> <li>three <ul> <li>3.1</li> <li>3.2</li> </ul> </li> </ul> "; var parser = new WikipediaParser(wikiText); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_LevelJumping() { var wikiText = @" & One & Two &&& Level Jump "; var sectionHtml = @"<ol> <li>One</li> <li>Two <ol> <li> <ol> <li>Level Jump</li> </ol> </li> </ol> </li> </ol> "; var parser = new WikipediaParser(wikiText.Replace('&', '#')); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_Indentation() { var wikiText = @" : Single indent :: Double indent ::::: Multiple indent "; var sectionHtml = @"<dl> <dd>Single indent <dl> <dd>Double indent <dl> <dd> <dl> <dd> <dl> <dd>Multiple indent</dd> </dl> </dd> </dl> </dd> </dl> </dd> </dl> </dd> </dl> "; var parser = new WikipediaParser(wikiText); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
/// <summary> /// Initializes incremental loading from the view. /// </summary> /// <param name="cancellationToken">The task cancellation token</param> /// <returns><see cref="System.Boolean"/></returns> private async Task <bool> LoadMoreItemsInternalAsync(CancellationToken cancellationToken) { string baseUrl = "http://www.bing.com"; string requestUrl = $"{baseUrl}/search?q=\"{_query}\"+(site:en.wikipedia.org+inbody:\"dead+link\")"; List <string> usedUrls = new List <string>(); while (!_cancellationToken.IsCancellationRequested && !cancellationToken.IsCancellationRequested && !requestUrl.IsNullOrEmpty()) { try { string response = await Downloader.DownloadTextAsync(requestUrl, note : "Requesting webpage from Bing..."); if (string.IsNullOrEmpty(response)) { // Brute Force Algorithm continue; } var links = WikipediaParser.ParseBingResults(response, usedUrls); usedUrls.AddRange(links); IEnumerable <WikipediaSearchResult> results = await Task.WhenAll(links.Select(async s => new WikipediaSearchResult() { Source = "Wikipedia", SourceAddress = s, Domains = (await WikipediaParser.ParseWiki(s)).SelectMany(d => _parser.Invoke(d, "Wikipedia", _query)) })); await System.Windows.Application.Current.Dispatcher.BeginInvoke( System.Windows.Threading.DispatcherPriority.Normal, new Action(() => { foreach (var result in results) { if (!_cancellationToken.IsCancellationRequested) { _store.Add(result); } } })); requestUrl = WikipediaParser.ParseNextPage(response, baseUrl); } catch (Exception) { requestUrl = null; } } return(true); }
public void WikipediaParser_ProcessTemplates() { Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.")); Assert.AreEqual("This is a {{test.", WikipediaParser.ProcessTemplates("This is a {{test.")); // Unbalanced Assert.AreEqual("This is a {{test{{.", WikipediaParser.ProcessTemplates("This is a {{test{{.")); // Unbalanced Assert.AreEqual("This is a }}test.", WikipediaParser.ProcessTemplates("This is a }}test.")); // Unbalanced Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is {{XXX {{YYY}} }}a test.")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.{{XXX {{YYY}} }}")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("{{XXX}}This is a test.")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This{{XXX}} is a test.")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is a test.{{XXX}}")); Assert.AreEqual(string.Format("This is a test: {0:yyyy}", DateTime.UtcNow), WikipediaParser.ProcessTemplates("This is a test: {{CURRENTYEAR}}")); Assert.AreEqual("This is a test.", WikipediaParser.ProcessTemplates("This is {{XXX {{CURRENTYEAR}} }}a test.")); }
public void WikipediaParser_Link_Endings() { var parser = new WikipediaParser( @" [[ Help ]]ers "); string sectionHtml = @"<p> <a href=""http://www.wikipedia.org/wiki/Help"" target=""_blank"" rel=""nofollow"">Helpers</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_DiscardFile() { var parser = new WikipediaParser( @" [[ File:Test Link ]] This is a test. "); string sectionHtml = @"<p> This is a test. </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_AltText() { var parser = new WikipediaParser( @" [[ Test Link | Hello World! ]] "); string sectionHtml = @"<p> <a href=""http://www.wikipedia.org/wiki/Test_Link"" target=""_blank"" rel=""nofollow"">Hello World!</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_Https() { var parser = new WikipediaParser( @" Link: https://www.lilltek.com "); string sectionHtml = @"<p> Link: <a href=""https://www.lilltek.com"" target=""_blank"" rel=""nofollow"">https://www.lilltek.com</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Paragraph_SingleLine() { var parser = new WikipediaParser( @" This is a test of the emergency broadcasting system. "); string sectionHtml = @"<p> This is a test of the emergency broadcasting system. </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Artifact_Removal() { var parser = new WikipediaParser( @" before()-after before( )-after "); string sectionHtml = @"<p> before-after before-after </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Wikipedia_Test() { var inputFile = @"Seattle.txt"; var outputPath = string.Format(@"C:\Temp\WikipediaTest\{0}.htm", Path.GetFileNameWithoutExtension(inputFile)); var parser = new WikipediaParser(ReadResourceText(inputFile)); Assert.AreEqual("http://www.wikipedia.org/wiki/Seattle", parser.SourceUri); parser.RenderAsHtmlPage(outputPath); inputFile = @"Lynnwood_Washington.txt"; outputPath = string.Format(@"C:\Temp\WikipediaTest\{0}.htm", Path.GetFileNameWithoutExtension(inputFile)); parser = new WikipediaParser(ReadResourceText(inputFile)); Assert.AreEqual("http://www.wikipedia.org/wiki/Lynnwood,_Washington", parser.SourceUri); parser.RenderAsHtmlPage(outputPath); }
public void WikipediaParser_HRTag() { var parser = new WikipediaParser( @" Test ---- "); string sectionHtml = @"<p> Test <hr /> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_PageLinkAnchorAltText() { var parser = new WikipediaParser( @" <page><title>Test Page</title><text> [[ Another Page#anchor | My Page ]] </text></page> "); string sectionHtml = @"<p> <a href=""http://www.wikipedia.org/wiki/Another_Page#anchor"" target=""_blank"" rel=""nofollow"">My Page</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_Complex() { var wikiText = @" & Start each line & with a number sign. && More number signs gives deeper &&& and deeper &&& levels. & Line breaks don't break levels. &&& But jumping levels creates empty space. & Blank lines end the list "; var sectionHtml = @"<ol> <li>Start each line</li> <li>with a number sign. <ol> <li>More number signs gives deeper <ol> <li>and deeper</li> <li>levels.</li> </ol> </li> </ol> </li> <li>Line breaks don't break levels. <ol> <li> <ol> <li>But jumping levels creates empty space.</li> </ol> </li> </ol> </li> <li>Blank lines end the list</li> </ol> "; var parser = new WikipediaParser(wikiText.Replace('&', '#')); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_Bullet_OneLevel() { var wikiText = @" * one * two * three "; var sectionHtml = @"<ul> <li>one</li> <li>two</li> <li>three</li> </ul> "; var parser = new WikipediaParser(wikiText); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_List_BulletsWithLinks() { var wikiText = @" * [[ Link1 ]] * [[ Link2 ]] * [[ Link3 ]] "; var sectionHtml = @"<ul> <li><a href=""http://www.wikipedia.org/wiki/Link1"" target=""_blank"" rel=""nofollow"">Link1</a></li> <li><a href=""http://www.wikipedia.org/wiki/Link2"" target=""_blank"" rel=""nofollow"">Link2</a></li> <li><a href=""http://www.wikipedia.org/wiki/Link3"" target=""_blank"" rel=""nofollow"">Link3</a></li> </ul> "; var parser = new WikipediaParser(wikiText); Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Preformatted() { var parser = new WikipediaParser( @" This is a test of the emergency broadcasting system. This is only a test. In the event of a real emergency we'd be pretty much screwed right now. "); string sectionHtml = @"<pre> This is a test of the emergency broadcasting system. This is only a test. In the event of a real emergency we'd be pretty much screwed right now. </pre> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_MultipleWiki() { var parser = new WikipediaParser( @" Link1: [[ Test Link 1 ]] Link2: [[ Test Link 2 ]] Link3: [[ Test Link 3 ]] "); string sectionHtml = @"<p> Link1: <a href=""http://www.wikipedia.org/wiki/Test_Link_1"" target=""_blank"" rel=""nofollow"">Test Link 1</a> Link2: <a href=""http://www.wikipedia.org/wiki/Test_Link_2"" target=""_blank"" rel=""nofollow"">Test Link 2</a> Link3: <a href=""http://www.wikipedia.org/wiki/Test_Link_3"" target=""_blank"" rel=""nofollow"">Test Link 3</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_Empty() { var parser = new WikipediaParser( @" Hello Link: [[ ]] World "); string sectionHtml = @"<p> Hello Link: World </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_MultipleHttp() { var parser = new WikipediaParser( @" Link1: http://www.lilltek.com Link2: http://www.google.com/test.aspx?hello=world Link3: https://microsoft.com/ "); string sectionHtml = @"<p> Link1: <a href=""http://www.lilltek.com"" target=""_blank"" rel=""nofollow"">http://www.lilltek.com</a> Link2: <a href=""http://www.google.com/test.aspx?hello=world"" target=""_blank"" rel=""nofollow"">http://www.google.com/test.aspx?hello=world</a> Link3: <a href=""https://microsoft.com/"" target=""_blank"" rel=""nofollow"">https://microsoft.com/</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_StripXml() { Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("<page><text>Hello World!</text></page>").Trim()); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("Hello World!")); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("#REDIRECT [xxx]\nHello World!")); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("#redirect [xxx]\nHello World!")); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect("<page><text>#REDIRECT [xxx]\nHello World!</text></page>").Trim()); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect( @"<page> <text> Hello World! </text> </page>").Trim()); Assert.AreEqual("Hello World!", WikipediaParser.StripXmlAndRedirect( @"<page> <text foo=""bar""> Hello World! </text> </page>").Trim()); }
public void WikipediaParser_Link_External() { var parser = new WikipediaParser( @" Link1: [ http://www.microsoft.com/ Microsoft ] Link2: [ http://google.com ] Link3: [ https://www.lilltek.com/ LillTek ] Link4: [ https://forbes.com ] "); string sectionHtml = @"<p> Link1: <a href=""http://www.microsoft.com/"" target=""_blank"" rel=""nofollow"">Microsoft</a> Link2: [<a href=""http://google.com"" target=""_blank"" rel=""nofollow"">1</a>] Link3: <a href=""https://www.lilltek.com/"" target=""_blank"" rel=""nofollow"">LillTek</a> Link4: [<a href=""https://forbes.com"" target=""_blank"" rel=""nofollow"">2</a>] </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_ParsePageXml() { var parser = new WikipediaParser( @" <page> <title>Lynnwood, Washington</title> <id>138213</id> <revision> <id>421566368</id> <timestamp>2011-03-30T23:40:34Z</timestamp> <contributor> <username>L5gcw0b</username> <id>14294239</id> </contributor> <minor /> <comment>/* Neighborhood parks */</comment> <text xml:space=""preserve"">{{redirect|Lynnwood}} Hello World! ==History== This is the history ==Geography== This is the geography. </text> </revision> </page>"); Assert.AreEqual(1, parser.Sections[0].Level); Assert.AreEqual("Lynnwood, Washington", parser.Sections[0].Title); Assert.AreEqual(2, parser.Sections[1].Level); Assert.AreEqual("History", parser.Sections[1].Title); Assert.AreEqual(2, parser.Sections[2].Level); Assert.AreEqual("Geography", parser.Sections[2].Title); }
public void WikipediaParser_Link_Nested() { // Make sure that links with other links nested within are removed. var parser = new WikipediaParser( @" Hello Link1: [[ Test Link 1 | [[ Nested Link ]] [[ Another Nested ]] ]] Link2: [[ Test Link 2 ]] World "); string sectionHtml = @"<p> Hello Link1: Link2: <a href=""http://www.wikipedia.org/wiki/Test_Link_2"" target=""_blank"" rel=""nofollow"">Test Link 2</a> World </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_Link_MultipleMixed() { var parser = new WikipediaParser( @" Link1: [[ Test Link 1 ]] Link2: http://www.google.com/test.aspx?hello=world Link3: [[ Test Link 3 ]] Link4: https://microsoft.com/ Link5: [[ Test Link 5 ]] "); string sectionHtml = @"<p> Link1: <a href=""http://www.wikipedia.org/wiki/Test_Link_1"" target=""_blank"" rel=""nofollow"">Test Link 1</a> Link2: <a href=""http://www.google.com/test.aspx?hello=world"" target=""_blank"" rel=""nofollow"">http://www.google.com/test.aspx?hello=world</a> Link3: <a href=""http://www.wikipedia.org/wiki/Test_Link_3"" target=""_blank"" rel=""nofollow"">Test Link 3</a> Link4: <a href=""https://microsoft.com/"" target=""_blank"" rel=""nofollow"">https://microsoft.com/</a> Link5: <a href=""http://www.wikipedia.org/wiki/Test_Link_5"" target=""_blank"" rel=""nofollow"">Test Link 5</a> </p> "; Assert.AreEqual(1, parser.Sections.Count); Assert.AreEqual(sectionHtml, parser.Sections[0].Html); }
public void WikipediaParser_StripMagicWords() { var text = "This is a test__NOTOC__ of the__NONEWSECTIONLINK__ emergency__NOINDEX__ broadcasting system."; Assert.AreEqual("This is a test of the emergency broadcasting system.", WikipediaParser.StripMagicWords(text)); }