コード例 #1
0
        private static async Task Main(string[] args)
        {
            var mongoClient = new MongoClient(MongoDbConnString);
            var storage     = new MongoWikiDownloadStorage(mongoClient);

            var titles = await storage.GetAllPageTitlesAsync();

            var nodeKeysInfo = NodeKeysInfoCalculator.Build(titles);

            File.WriteAllText(NodeKeysInfoOutputFileName, nodeKeysInfo.ToJson());

            var edges = new Dictionary <string, string[]>();

            foreach (var title in nodeKeysInfo.Regular)
            {
                var content = await storage.GetPageContent(title);

                edges[title] = ExtractReferences(content)
                               .Select(x => MapReference(nodeKeysInfo, x))
                               .Where(x => x != null)
                               .Distinct()
                               .ToArray();
            }

            File.WriteAllText(EdgesOutputFileName, edges.ToJson());
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: SWATOPLUS/InfoSearch
        private static async Task Main()
        {
            var mongoClient = new MongoClient(MongoDbConnString);
            var storage     = new MongoWikiDownloadStorage(mongoClient);

            var links = await File.ReadAllLinesAsync(ArticleListFileName);

            var titles = await storage.GetAllPageTitlesAsync();

            var titlesDict = titles.ToDictionary(x => x.Name);

            var refs   = new List <string>();
            var errors = new List <string>();
            var pages  = new Dictionary <string, string>();

            foreach (var link in links)
            {
                var name = link.Replace("_", " ");

                if (!titlesDict.ContainsKey(name))
                {
                    errors.Add(link);

                    continue;
                }

                var title = titlesDict[name];

                var content = await storage.GetPageContent(title.ReferenceName ?? title.Name);

                if (content == null)
                {
                    errors.Add(link);

                    continue;
                }

                var document = new HtmlDocument();
                document.LoadHtml(content);

                pages[link] = document.DocumentNode.InnerText;
            }

            var extractInfo = new { errors, refs };

            await File.WriteAllTextAsync(ExtractInfoOutputFileName, JsonConvert.SerializeObject(extractInfo));

            await File.WriteAllTextAsync(ExtractedPagesOutputFileName, JsonConvert.SerializeObject(pages));
        }