public static void SaveUriPageContentToTxtFile(Uri uri, string filePath) { HtmlDocument doc = WKGE.GetHtmlDocumentFromUri(uri); HtmlNode contentRoot = WKGE.GetWikipediaPageContentNode(doc); if (contentRoot != null) { File.WriteAllText(filePath, contentRoot.InnerText); } }
public static void DownloadAllPagesInKnGraph() { KnowledgeGraph knowledgeGraph = WKGE.ExtractKnGraphFromUris(WKGE.WikipediaPagesToParse); // Download the html files SaveUriToHtmlFile(WKGE.GetWikipediaListOfAlgorithmsPageUri(), "../../../DownloadedHtmlPages/listOfAlgos.html"); SaveUriToHtmlFile(WKGE.GetWikipediaListOfDataStructuresPageUri(), "../../../DownloadedHtmlPages/listOfDataStructures.html"); // Download the content of each link in the nodes of the kn graph foreach (KnowledgeGraphNode node in knowledgeGraph.KnGraph.Where(x => x.LinkToPage != null)) { string filePath = GetFilePathFromNodeLinkTopage(node.LinkToPage); if (!File.Exists(filePath)) { SaveUriPageContentToTxtFile(node.LinkToPage, filePath); Console.WriteLine("Wrote file with name " + Path.GetFileName(filePath)); } else { Console.WriteLine("!!! ALREADYEXISTS !!! " + filePath); } } }
public static void SaveUriToHtmlFile(Uri uri, string filePath) { HtmlDocument doc = WKGE.GetHtmlDocumentFromUri(uri); File.WriteAllText(filePath, doc.DocumentNode.OuterHtml); }