Beispiel #1
0
        private static void WriteConstants(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing constant definitions ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_constants.ttl")), nsMapper)) {
                WriteCloseReasons(generalUris, destWriter);

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
        private static void WriteAccountList(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing account list ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_users.ttl")), nsMapper)) {
                foreach (var id in GlobalData.AccountIds)
                {
                    destWriter.StartTriple(generalUris.CreateAccountUri(id));
                    destWriter.AddToTriple(generalUris.TypeProperty, generalUris.AccountType);
                }

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
        private static void WriteOntology(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing ontology ...");

            using (var tempNsMapper = new NamespaceMapper(false)) {
                tempNsMapper.Import(nsMapper);
                tempNsMapper.AddNamespace("owl", new Uri(NamespaceMapper.OWL));

                using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_ontology.ttl")), tempNsMapper)) {
                    WriteOntologyDefinitions(generalUris, destWriter);

                    GlobalData.UpdateStats(destWriter);
                }
            }
            Console.WriteLine(" done.");
        }
Beispiel #4
0
        /// <summary>
        /// Downloads the file to the local file system.
        /// </summary>
        /// <param name="baseUri">The base URI that the relative file name can be expanded from.</param>
        /// <param name="directory">The destination directory.</param>
        /// <returns>The path to the downloaded file.</returns>
        /// <exception cref="ArgumentNullException">Any of the arguments is <see langword="null"/>.</exception>
        /// <remarks>
        /// <para>This method downloads the file to the local file system.
        ///   After downloading, the MD5 hash of the file contents is verified.</para>
        /// </remarks>
        public string Download(Uri baseUri, string directory)
        {
            if (baseUri == null)
            {
                throw new ArgumentNullException("baseUri");
            }

            string fn = Path.Combine(directory, name);

            ConsoleHelper.WriteMilestone("Downloading {0} ...", name);
            using (var client = new WebClient()) {
                client.DownloadFile(new Uri(baseUri, name), fn);
            }
            System.Console.WriteLine(" done.");

            var fInfo = new FileInfo(fn);

            long fileSize = fInfo.Length;

            if (fileSize == this.size)
            {
                ConsoleHelper.WriteSuccessLine("File size of {0} bytes verified.", fileSize);
            }
            else
            {
                ConsoleHelper.WriteWarningLine("File size is {0} bytes, which differs from the expected size of {1} bytes.", fileSize, this.size);
            }

            byte[] hash;
            using (var algo = MD5.Create()) {
                using (var fs = fInfo.OpenRead()) {
                    hash = algo.ComputeHash(fs);
                }
            }
            string fileMD5 = string.Join("", hash.Select(b => string.Format(System.Globalization.CultureInfo.InvariantCulture, "{0:X2}", b))).ToLowerInvariant();

            if (fileMD5 == this.md5)
            {
                ConsoleHelper.WriteSuccessLine("MD5 hash ({0}) verified.", fileMD5);
            }
            else
            {
                ConsoleHelper.WriteWarningLine("MD5 hash of file ({0}) does not match the expected one ({1}).", fileMD5, this.md5);
            }

            return(fn);
        }
Beispiel #5
0
        private static void WriteBadgesLists(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing lists of badges ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_badges.ttl")), nsMapper)) {
                foreach (var siteBadges in GlobalData.GetBadgesPerSite())
                {
                    Uri      siteUri = generalUris.CreateSiteUri(siteBadges.Item1);
                    SiteUris uris    = new SiteUris(generalUris, siteBadges.Item1);
                    foreach (string badgeName in siteBadges.Item2)
                    {
                        WriteBadgeInfo(uris, badgeName, destWriter);
                    }
                }

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
Beispiel #6
0
        private static void ConvertSiteList(GeneralUris generalUris, string tempDir, Uri baseUri, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            string srcFile = Path.Combine(tempDir, "Sites.xml");

            ConsoleHelper.WriteMilestone("Downloading site list ...");
            using (var client = new WebClient()) {
                client.DownloadFile(new Uri(baseUri, "Sites.xml"), srcFile);
            }
            Console.WriteLine(" done.");

            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_sites.ttl")), nsMapper)) {
                using (var fs = File.OpenRead(srcFile)) {
                    using (var reader = XmlReader.Create(fs)) {
                        while (reader.NodeType != XmlNodeType.Element)
                        {
                            if (!reader.Read())
                            {
                                ConsoleHelper.WriteErrorLine("No contents found in file {0}.", srcFile);
                                return;
                            }
                        }

                        if (reader.LocalName == "sitelist")
                        {
                            ConvertSites(generalUris, reader, destWriter);
                        }
                        else
                        {
                            ConsoleHelper.WriteWarningLine("Unknown root element \"{0}\". Skipping document.", reader.LocalName);
                        }
                    }
                }

                GlobalData.UpdateStats(destWriter);
            }

            Console.WriteLine("Conversion of site list completed.");
        }
Beispiel #7
0
        /// <summary>
        /// Downloads and converts the data.
        /// </summary>
        /// <param name="generalUris">An object that provides general URIs used in the exported dataset.</param>
        /// <param name="filelist">The URL of a filelist Xml file.</param>
        /// <exception cref="ArgumentNullException">Any of the arguments is <see langword="null"/>.</exception>
        private static void RetrieveData(GeneralUris generalUris, Uri filelist)
        {
            if (generalUris == null)
            {
                throw new ArgumentNullException("generalUris");
            }
            if (filelist == null)
            {
                throw new ArgumentNullException("filelist");
            }

            string tempDir = Path.Combine(BaseDir, "tmp");
            string destDir = Path.Combine(BaseDir, "rdf");

            Directory.CreateDirectory(tempDir);
            Directory.CreateDirectory(destDir);

            DateTime startTime = DateTime.Now;

            ConsoleHelper.WriteInfoLine("Current time: {0:yyyy-MM-dd HH:mm:ss}", startTime);

            if (!GlobalData.Options.OntologyOnly)
            {
                ConsoleHelper.WriteMilestone("Downloading files list ...");
                using (var client = new WebClient()) {
                    client.DownloadFile(filelist, Path.Combine(tempDir, "files.xml"));
                }
                Console.WriteLine(" done.");

                var files = LoadFilesList(Path.Combine(tempDir, "files.xml")).OrderBy(f => f).ToArray();
                ConsoleHelper.WriteInfoLine("{0} file(s) in list, totalling to a compressed size of {1:F1} GB.",
                                            files.Length, (double)files.Sum(f => f.Size) / 1024 / 1024 / 1024);

                int processedFilesCount = 0;
                foreach (var f in files)
                {
                    SiteInfo siteInfo;
                    try {
                        siteInfo = f.RetrieveSiteInfo();
                    }
                    catch (ArgumentException ex) {
                        ConsoleHelper.WriteErrorLine("Skipping file {0}, as it cannot be associated with a website.\n{1}", f, ex);
                        continue;
                    }

                    if (IncludeSite(siteInfo))
                    {
                        GlobalData.Sites[siteInfo.Id] = siteInfo;

                        if (!GlobalData.Options.SiteListOnly)
                        {
                            string fn = f.Download(filelist, tempDir);

                            string[] rawFiles = null;
                            switch (Path.GetExtension(fn))
                            {
                            case ".7z":
                                rawFiles = ExtractSevenZipArchive(fn);
                                break;

                            default:
                                ConsoleHelper.WriteWarningLine("File {0} has an unknown file extension.", fn);
                                break;
                            }

                            if (rawFiles != null)
                            {
                                ConsoleHelper.WriteInfoLine("{0} file(s) extracted.", rawFiles.Length);

                                foreach (var rawFile in rawFiles)
                                {
                                    Converter.Convert(generalUris, rawFile, destDir, siteInfo);
                                }
                            }
                        }

                        processedFilesCount++;
                        if (processedFilesCount >= GlobalData.Options.MaxFileCount)
                        {
                            break;
                        }
                    }
                }
            }

            GlobalInformationConverter.Convert(generalUris, tempDir, filelist, destDir);

            if (!GlobalData.Options.KeepTemporaryFiles)
            {
                Console.Write("Removing temporary files ... ");
                try {
                    Directory.Delete(tempDir, true);
                    Console.WriteLine(" done.");
                }
                catch {
                    ConsoleHelper.WriteErrorLine("Please remove the directory {0} manually.", tempDir);
                }
            }

            Console.WriteLine();
            GlobalData.PrintStats();

            DateTime endTime = DateTime.Now;

            ConsoleHelper.WriteInfoLine("Current time: {0:yyyy-MM-dd HH:mm:ss}", endTime);
            ConsoleHelper.WriteInfoLine("Total duration: {0}", endTime - startTime);
        }