public static async Task <WikipediaData> GetWiki(string searchQuery) { var wiki = new WikipediaData(); using (var client = new HttpClient()) { try { var stringJson = await client.GetStringAsync( $@"https://en.wikipedia.org/w/api.php?action=opensearch&search={searchQuery}&limit=1&namespace=0&format=json"); JToken token = JToken.Parse(stringJson); const string quote = "\""; string patternDesc = $@"\([^()]*\)|\r\n|{quote}|\\"; string patternBrackets = $@"[\[\]']|\r\n|{quote}|\\"; wiki.Description = Regex.Replace( Regex.Replace( Regex.Replace(HtmlEntity.DeEntitize(token[2].ToString()), patternDesc, ""), patternDesc, "").Trim(), patternBrackets, ""); wiki.Link = Regex.Replace(token[3].ToString(), patternBrackets, "").Trim(); } catch { wiki.Description = ""; wiki.Link = ""; } } return(wiki); }
private void AddHeaderQuestion(WikipediaData wiki_data, string subject, string header, string data, bool probably_person) { string statement = ""; if ((header == "Carbohydrates") || (header == "Fat") || (header == "Protein")) { if (header == "Carbohydrates") header = "carbohydrate"; statement = subject + " has a " + header.ToLower() + " content of " + data; wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Birth name") { statement = subject + " was born named " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Conservation status") { statement = subject + " has the conservation status " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Location") { statement = subject + " is located in " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Date") { if (ContainsMonth(data)) { if (data.Contains("-")) { string[] datastr = data.Split('-'); statement = subject + " occurred between " + datastr[0].Trim() + " and " + datastr[1].Trim(); } else { statement = subject + " occurred on " + data; } //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Binomial name") { string[] wrds = data.Split(','); statement = subject + " has the latin name " + wrds[0]; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Subspecies") { string[] wrds = data.Split(','); if (!wrds[0].ToLower().StartsWith("see ")) { statement = subject + " belongs to the subspecies " + wrds[0]; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header == "Birthplace") || (header == "Birth place")) { statement = subject + " was born in " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Industry") { statement = subject + " belongs to the industry " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Time zone") { statement = subject + " is in the time zone " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Currency") { statement = subject + " uses the currency " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if ((header.ToLower() == "ethnic group") || (header.ToLower() == "ethnicity")) { statement = subject + " belongs to the ethnic group " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if ((header == "Preceded by") || (header == "Precededby")) { statement = subject + " was preceded by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Previous mission")) { statement = subject + " was preceded by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Memory") { statement = subject + " has a memory of " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Reign") { if (data.Contains("-")) { string[] str = data.Split('-'); statement = subject + " reigned between " + str[0].Trim() + " and " + str[1].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Coronation") { statement = subject + " was crowned on " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Religious stance") { statement = subject + " has the religious stance " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.ToLower().Contains("awards")) { statement = subject + " won awards " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.ToLower().Contains("advisor")) { statement = subject + " was advised by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if ((header == "Followed by") || (header == "Followedby") || (header == "Succeededby") || (header == "Succeeded by")) { statement = subject + " was followed by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Next mission")) { statement = data + " was followed by " + subject; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Launched")) { statement = subject + " was launched on " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Orbital period")) { statement = subject + " has an orbital period of " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Eccentricity")) { statement = subject + " has an orbital eccentricity of " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Discovery date")) { string[] str = data.Split(' '); if (str.Length == 1) { statement = subject + " was discovered in " + data; } else { statement = subject + " was discovered on " + data; } //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Publication date")) { string[] str = data.Split(' '); if (str.Length == 1) { statement = subject + " was published in " + data; } else { statement = subject + " was published on " + data; } //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Publisher")) { statement = subject + " was published by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Media type")) { statement = subject + " has the media type " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Author")) { statement = subject + " was written by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Patron saint")) { statement = subject + " has the patron saint " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header.Contains("Discovered by")) { statement = subject + " was discovered by " + data; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } if (header == "Death") { string[] wrds = data.Split(' '); if ((wrds.Length>1) && (!data.Contains(" BC")) && (!data.Contains(" B.C.")) && (!data.Contains(" AD")) && (!data.Contains(" A.D."))) { if (ContainsMonth(data)) statement = subject + " died on " + data; else statement = ""; } else { statement = subject + " died in " + data; } if (statement != "") { //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Birth") { string[] wrds = data.Split(' '); if ((wrds.Length>1) && (!data.Contains(" BC")) && (!data.Contains(" B.C.")) && (!data.Contains(" AD")) && (!data.Contains(" A.D."))) { if (ContainsMonth(data)) { statement = subject + " born on " + data; } else statement = ""; } else { statement = subject + " born in " + data; } if (statement != "") { //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Also known as") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " is also known as " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "School/tradition") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " follows the school of thought " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Main interests") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " is interested in " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Notable ideas") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " has ideas in " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Influenced") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { if (names[i].Trim() != subject) { //statement = names[i].Trim() + " was influenced by " + subject; statement = subject + " influenced " + names[i].Trim(); //if (names[i].Contains("Adam")) Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } } if (header == "Government") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " has the government type " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header == "Politicalparty") || (header == "Politicalparty")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " was a member of the " + names[i].Trim() + " party"; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Alamater") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " attended " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header.ToLower() == "vicepresident") || (header.ToLower() == "vice president")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { names[i] = names[i].Trim(); if ((names[i].Length > 3) && (names[i].ToLower() != "none")) { statement = subject + " had the vice president " + names[i]; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } } if ((header == "Official languages") || (header == "Official language(s)") || (header == "Official language")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " has the official language " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if (header == "Demonym") { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = names[i].Trim() + "s live in " + subject; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; statement = names[i].Trim() + " is a type of nationality"; //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header == "Occupation(s)") || (header == "Occupation")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " has the occupation " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header == "Genre(s)") || (header == "Genre")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " has the genre " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header == "Instrument(s)") || (header == "Instrument")) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { names[i] = names[i].Trim(); if (names[i].ToLower() != "vocals") { statement = subject + " plays the instrument " + names[i]; } else { statement = subject + " is a singer"; } //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } if ((header.Contains("Associated act")) || (header.Contains("Related"))) { string[] names = data.Split(','); for (int i = 0; i < names.Length; i++) { statement = subject + " is associated with " + names[i].Trim(); //Console.WriteLine(header + " " + statement); wiki_data.content[wiki_data.content_entries++] = statement; } } }
private void AddMindpixel( string mindpixel_filename, WikipediaData wiki_data, int entry_number, ref bool man_woman) { string question; string title, category, bisect, question_header; string[] words; string[] titlewords; bool possible_person; int i; string[] content = new string[3]; ArrayList to_be_added = new ArrayList(); content[0] = wiki_data.content[entry_number]; content[1] = ""; content[2] = ""; int pos2=-1; int pos = wiki_data.content[entry_number].IndexOf(", "); if (pos > -1) { pos2 = wiki_data.content[entry_number].IndexOf(", ", pos+1); } if ((pos < 50) && (pos > -1) && (pos2 > pos)) { string str = wiki_data.content[entry_number].Substring(pos+1, pos2 - pos).Trim(); string[] str2 = str.Split(' '); if (str2.Length > 0) { string content1 = ""; if ((str2[0].StartsWith("a")) || (str2[0].StartsWith("otherwise")) || (str2[0] == "the") || (str2[0].EndsWith("ly"))) { content1 = wiki_data.content[entry_number].Substring(0, pos) + " is " + str; } else { if ((str2[0].StartsWith("or")) || (str2.Length == 1)) { content1 = wiki_data.content[entry_number].Substring(0, pos) + " is otherwise known as " + str; } else { content1 = wiki_data.content[entry_number].Substring(0, pos2); } } content[1] = ReplaceWord(content1,",",""); string content2 = wiki_data.content[entry_number].Substring(0, pos) + wiki_data.content[entry_number].Substring(pos2); content[2] = ReplaceWord(content2,",",""); //Console.WriteLine("Content1: " + content[1]); //Console.WriteLine("Content2: " + content[2]); } } for (int s = 0; s < 3; s++) { if (content[s] != "") { //if (content[1] != "") Console.WriteLine("question1: " + content[s]); question = ParseAsQuestion(wiki_data.Title, content[s]); current_question = question; //if (content[1] != "") Console.WriteLine("question2: " + question); title = wiki_data.Title.ToLower(); title = removeBrackets(title); if (!title.Contains("list of ")) { if ((question != "") && (!question.Contains("disambiguation")) && (!question.Contains("cleanup")) && (!question.Contains("wikipedia") && (!question.Contains("wikified"))) && (!question.Contains("article lacking") && (!question.Contains("article needing"))) ) { if (!ContainString(to_be_added, question)) to_be_added.Add(question); } possible_person = false; titlewords = title.Split(' '); for (i = 0; i < wiki_data.categories.Count; i++) { category = (String)wiki_data.categories[i]; Console.WriteLine("CATEGORY: " + category); if ((category != "") && (category != "disambiguation") && (!category.Contains("cleanup")) && (!category.Contains("wikipedia") && (!category.Contains("wikified"))) && (!category.Contains("article lacking") && (!category.Contains("article needing")) && (!category.Contains("article pending"))) ) { category = category.ToLower(); words = category.Split(' '); bisect = "a type of"; question_header = "Is "; if (category.Contains(" birth")) { question_header = "Was "; bisect = "born in"; category = words[0]; possible_person = true; } if (category.Contains(" death")) { question_header = "Did "; bisect = "die in"; category = words[0]; possible_person = true; } if (category.Contains("suicide")) { question_header = "Did "; bisect = "commit"; category = words[0]; possible_person = true; } if (category.Contains("people")) possible_person = true; if (category.Contains("living people")) category = ""; if (category.Contains("list of ")) category = ""; if ((category != "") && (category != "stub") && (!category.Contains("disambiguation"))) { question = question_header + title + " " + bisect + " " + category + "?"; if (!ContainString(to_be_added, question)) to_be_added.Add(question); if (current_question == "") current_question = question; } } } if ((!man_woman) && (((titlewords.Length == 2) || ((possible_person) && (titlewords.Length > 0))))) { if (names.IsMaleName(titlewords[0])) { question = "Is " + title + " a man?"; if (!ContainString(to_be_added, question)) to_be_added.Add(question); man_woman = true; } else { if (names.IsFemaleName(titlewords[0])) { question = "Is " + title + " a woman?"; if (!ContainString(to_be_added, question)) to_be_added.Add(question); man_woman = true; } } } } } } for (i = 0; i < to_be_added.Count; i++) { question = (string)to_be_added[i]; string[] s2 = question.Split(' '); if (s2[s2.Length-1].Length > 3) { if ((!question.EndsWith(" born?")) && (!question.EndsWith(" and?")) && (!question.EndsWith(" usually?")) && (!question.EndsWith(" was?")) && (!question.EndsWith(" small?")) && (!question.EndsWith(" large?")) && (!question.EndsWith(" under?")) ) SaveQuestion(mindpixel_filename, question); } } }
private void ProcessWikipediaEntry( string html, string mindpixel_filename) { int pos, pos2, i; string firstParagraph; string[] sentence; string[] words; string category; bool man_woman = false; string html_original = html; WikipediaData wiki_data = new WikipediaData(); wiki_data.Title = ""; wiki_data.content_entries = 0; wiki_data.categories.Clear(); pos = html.IndexOf("firstHeading"); if (pos > 0) { html = html.Substring(pos + 14); pos2 = html.IndexOf("<"); if (pos2 > 0) { //get the title of the article wiki_data.Title = html.Substring(0, pos2); wiki_data.Title = ReplaceWord(wiki_data.Title, "–", "-"); wiki_data.Title = ReplaceWord(wiki_data.Title, "'","'"); //Console.WriteLine("Title: " + wiki_data.Title); html = html.Substring(pos2); pos = html.IndexOf("start content"); if (pos > 0) { //get the content html = html.Substring(pos); int pos3 = html.IndexOf("<table"); pos = html.IndexOf("<p>"); if ((pos3 != -1) && (pos3 < pos)) { // skip table pos = html.IndexOf("</table>"); html = html.Substring(pos); pos = html.IndexOf("<p>"); } if (pos > 0) { html = html.Substring(pos+3); pos -= (pos+3); //Console.WriteLine(html); //pos2 = html.IndexOf("</p>"); if (pos+20 < html.Length) pos2 = html.IndexOf(". ", pos+20); else pos2 = -1; if ((pos2 == -1) && (pos+10 < html.Length)) pos2 = html.IndexOf(". ", pos+10); if ((pos2 == -1) && (pos+20 < html.Length)) pos2 = html.IndexOf(".", pos+20); if ((pos2 == -1) && (pos+10 < html.Length)) pos2 = html.IndexOf(".", pos+10); if (pos2 == -1) pos2 = html.IndexOf("."); if (pos2-3 > 0) { firstParagraph = html.Substring(0, pos2); firstParagraph = RemoveFormatting(firstParagraph); sentence = firstParagraph.Split('.'); if (sentence.Length > 0) { int max = sentence.Length; if (max > 1000) max = 1000; for (int s = 0; s < max; s++) { wiki_data.content[wiki_data.content_entries++] = sentence[s]; } } } } } // look for a categories pos = 1; while (pos > 0) { pos = html.IndexOf(Convert.ToString((char)34) + "Category:"); if (pos > 0) { pos += 10; html = html.Substring(pos); pos2 = html.IndexOf(">"); if (pos2 > 0) { category = html.Substring(0, pos2-1); if (!category.Contains(" stub")) { words = category.Split(' '); for (i = 0; i < words.Length; i++) { if (words[i].Length > 4) { if (words[i].Substring(words[i].Length - 3, 3) == "ies") words[i] = words[i].Substring(0, words[i].Length - 3) + "y"; } if (words[i].Substring(words[i].Length - 1, 1) == "s") words[i] = words[i].Substring(0, words[i].Length - 1); } category = ""; for (i = 0; i < words.Length; i++) { category += words[i]; if (i < words.Length) category += " "; } wiki_data.categories.Add(category); html = html.Substring(pos2); } } } } } } for (i = 0; i < wiki_data.content_entries; i++) { AddMindpixel(mindpixel_filename, wiki_data, i, ref man_woman); wiki_data.categories.Clear(); } wiki_data.content_entries = 0; ExtractHeaderData(html_original, wiki_data, man_woman); for (i = 0; i < wiki_data.content_entries; i++) { AddMindpixel(mindpixel_filename, wiki_data, i, ref man_woman); wiki_data.categories.Clear(); } }
private void ExtractHeaderData( string html, WikipediaData wiki_data, bool probably_person) { int pos4, pos5, pos6, pos3,pos2, pos = 0; while (pos != -1) { pos = html.IndexOf("<th", pos+1); if (pos > -1) { pos2 = html.IndexOf(">", pos); if (pos2 > -1) { //string test_str = html.Substring(pos, pos2-pos+1); //Console.WriteLine("test " + test_str); pos3 = html.IndexOf("</th>", pos2+1); if (pos3 != -1) { string header_str = html.Substring(pos2+1, pos3-pos2-1); //Console.WriteLine(header_str); //if (header_str.Contains("Birth")) Console.WriteLine("Birth: " + header_str); header_str = RemoveFormatting(header_str); if (header_str == "E°") header_str = "East"; if (header_str == "N°") header_str = "Nort"; header_str = ReplaceWord(header_str, "&", ""); header_str = ReplaceWord(header_str, " ", ""); header_str = ReplaceWord(header_str, "–", "-"); header_str = ReplaceWord(header_str, "'","'"); header_str = ReplaceWord(header_str, "°F","degrees farenheit"); header_str = header_str.Trim(); if (header_str.StartsWith("-")) header_str = header_str.Substring(1).Trim(); if ((!header_str.Contains("&")) && (header_str != "")) { if (!header_str.ToLower().Contains("scientific class")) { string header_data = ""; int tries = 3; while ((header_data == "") && (tries >= 0)) { tries--; pos4 = html.IndexOf("<td", pos3); if (pos4 > -1) { pos5 = html.IndexOf(">", pos4); if (pos5 > -1) { pos6 = html.IndexOf("</td>", pos5); if (pos6 > -1) { header_data = RemoveFormatting(html.Substring(pos5+1, pos6-pos5-1)); header_data = ReplaceWord(header_data, "&", ""); header_data = ReplaceWord(header_data, """, ""); header_data = ReplaceWord(header_data, " ", ""); header_data = ReplaceWord(header_data, "–", "-"); header_data = ReplaceWord(header_data, "'","'"); header_data = ReplaceWord(header_data, "²","^2"); header_data = ReplaceWord(header_data, "°F","degrees farenheit"); header_data = header_data.Trim(); if (header_data == "#") header_data = ""; if ((!header_data.Contains("&")) && (header_data != "")) { //Console.WriteLine("Title: " + wiki_data.Title); //Console.WriteLine(" Header: " + header_str); //Console.WriteLine(" Data: " + header_data); AddHeaderQuestion(wiki_data, wiki_data.Title, header_str, header_data, probably_person); } } } pos3 = pos4+1; } } } } //int pos3 = html.IndexOf("<th>", pos); //pos = pos3; } //pos = pos2; } } } }