Beispiel #1
0
        public static void Convert(GeneralUris generalUris, string tempDir, Uri baseUri, string destDir)
        {
            if (tempDir == null)
            {
                throw new ArgumentNullException("tempDir");
            }
            if (baseUri == null)
            {
                throw new ArgumentNullException("baseUri");
            }
            if (destDir == null)
            {
                throw new ArgumentNullException("destDir");
            }

            var nsMapper = generalUris.CreateNamespaceMapper();

            WriteOntology(generalUris, destDir, nsMapper);
            if (!GlobalData.Options.OntologyOnly)
            {
                ConvertSiteList(generalUris, tempDir, baseUri, destDir, nsMapper);
                if (!GlobalData.Options.SiteListOnly)
                {
                    WriteAccountList(generalUris, destDir, nsMapper);
                    WriteBadgesLists(generalUris, destDir, nsMapper);
                    WriteConstants(generalUris, destDir, nsMapper);
                }
            }
        }
Beispiel #2
0
        private static void ConvertSites(GeneralUris generalUris, XmlReader r, SequentialTurtleWriter w)
        {
            long skipped = 0;

            while (r.Read())
            {
                switch (r.NodeType)
                {
                case XmlNodeType.Element:
                    switch (r.LocalName)
                    {
                    case "row":
                        using (var subR = r.ReadSubtree()) {
                            subR.Read();
                            if (!ConvertSite(generalUris, subR, w))
                            {
                                skipped++;
                            }
                        }
                        break;
                    }
                    break;

                case XmlNodeType.EndElement:
                    return;
                }
            }

            if (skipped > 0)
            {
                ConsoleHelper.WriteWarningLine("{0} items from the list of sites were skipped.", skipped);
            }
        }
Beispiel #3
0
        public static void Main(string[] args)
        {
            var culture = new System.Globalization.CultureInfo("en-US");

            System.Threading.Thread.CurrentThread.CurrentCulture   = culture;
            System.Threading.Thread.CurrentThread.CurrentUICulture = culture;

            try {
                using (var parser = new CommandLine.Parser()) {
                    if (!parser.ParseArguments(args, GlobalData.Options))
                    {
                        Console.WriteLine(GlobalData.Options.GetUsage());
                        return;
                    }
                }

                if (GlobalData.Options.MaxFileCount < 1)
                {
                    ConsoleHelper.WriteErrorLine("As the maximum file count is set to {0}, no files can be imported.",
                                                 GlobalData.Options.MaxFileCount);
                    return;
                }
                if (GlobalData.Options.Files.Count < 1)
                {
                    ConsoleHelper.WriteInfoLine("No input file specified. Using default URL {0}.",
                                                DefaultFileUrl);
                    GlobalData.Options.Files.Add(DefaultFileUrl);
                }

                if (!string.IsNullOrWhiteSpace(GlobalData.Options.SiteNamePattern))
                {
                    try {
                        GlobalData.SiteNamePattern = new Regex(GlobalData.Options.SiteNamePattern);
                    }
                    catch (ArgumentException) {
                        ConsoleHelper.WriteErrorLine("Invalid regular expression: \"{0}\"",
                                                     GlobalData.Options.SiteNamePattern);
                        return;
                    }
                }

                GeneralUris generalUris = new GeneralUris(GlobalData.Options.GeneralPrefix);

                try {
                    Uri filelist = new Uri(GlobalData.Options.Files[0]);

                    RetrieveData(generalUris, filelist);
                }
                catch (UriFormatException) {
                    Console.WriteLine("Invalid filelist URL:");
                    throw;
                }
            }
            catch (Exception ex) {
                Console.WriteLine(ex.ToString());
            }
        }
Beispiel #4
0
        private static void WriteConstants(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing constant definitions ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_constants.ttl")), nsMapper)) {
                WriteCloseReasons(generalUris, destWriter);

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
Beispiel #5
0
        private static bool ConvertSite(GeneralUris generalUris, XmlReader r, SequentialTurtleWriter w)
        {
            Uri    subjectUri;
            string address;

            if (r.MoveToAttribute("Address"))
            {
                SiteInfo info;
                if (GlobalData.Sites.TryGetValue(r.Value, out info))
                {
                    address    = r.Value;
                    subjectUri = generalUris.CreateSiteUri(info);
                    w.StartTriple(subjectUri);
                    w.AddToTriple(generalUris.IsMetaSiteProperty, info.IsMetaSite);
                    w.AddToTriple(generalUris.LanguageProperty, info.IsEnglishSite ? "en" : info.Language);
                }
                else
                {
                    return(false);
                }
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No Address attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return(false);
            }

            w.AddToTriple(generalUris.TypeProperty, generalUris.SiteInfoType);
            w.AddToTriple(generalUris.WebsiteProperty, new Uri("http://" + address));
            if (r.MoveToAttribute("Name"))
            {
                w.AddToTriple(generalUris.LabelProperty, r.Value);
                w.AddToTriple(generalUris.TitleProperty, r.Value);
            }
            if (r.MoveToAttribute("Description"))
            {
                w.AddToTriple(generalUris.DescriptionProperty, r.Value);
            }
            if (r.MoveToAttribute("ParentAddress"))
            {
                SiteInfo parentInfo;
                if (GlobalData.Sites.TryGetValue(r.Value, out parentInfo))
                {
                    w.AddToTriple(generalUris.ParentSiteProperty, generalUris.CreateSiteUri(parentInfo));
                }
                else
                {
                    ConsoleHelper.WriteWarningLine("Unknown parent site {0}; skipping information.", r.Value);
                }
            }

            return(true);
        }
Beispiel #6
0
 private static void WriteCloseReasons(GeneralUris generalUris, SequentialTurtleWriter w)
 {
     WriteCloseReason(generalUris, w, generalUris.DuplicateCloseReason, "Duplicate");
     WriteCloseReason(generalUris, w, generalUris.OffTopicCloseReason, "Off-topic");
     WriteCloseReason(generalUris, w, generalUris.SubjectiveCloseReason, "Opinion-based");
     WriteCloseReason(generalUris, w, generalUris.NotAQuestionCloseReason, "Not a real question");
     WriteCloseReason(generalUris, w, generalUris.TooLocalizedCloseReason, "Too localized");
     WriteCloseReason(generalUris, w, generalUris.GeneralReferenceCloseReason, "General reference");
     WriteCloseReason(generalUris, w, generalUris.NoiseCloseReason, "Pointless/Noise");
     WriteCloseReason(generalUris, w, generalUris.UnclearCloseReason, "Unclear what you're asking");
     WriteCloseReason(generalUris, w, generalUris.TooBroadCloseReason, "Too broad");
 }
        private static void WriteAccountList(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing account list ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_users.ttl")), nsMapper)) {
                foreach (var id in GlobalData.AccountIds)
                {
                    destWriter.StartTriple(generalUris.CreateAccountUri(id));
                    destWriter.AddToTriple(generalUris.TypeProperty, generalUris.AccountType);
                }

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
        private static void WriteOntology(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing ontology ...");

            using (var tempNsMapper = new NamespaceMapper(false)) {
                tempNsMapper.Import(nsMapper);
                tempNsMapper.AddNamespace("owl", new Uri(NamespaceMapper.OWL));

                using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_ontology.ttl")), tempNsMapper)) {
                    WriteOntologyDefinitions(generalUris, destWriter);

                    GlobalData.UpdateStats(destWriter);
                }
            }
            Console.WriteLine(" done.");
        }
Beispiel #9
0
        private static void WriteBadgesLists(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            ConsoleHelper.WriteMilestone("Writing lists of badges ...");
            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_badges.ttl")), nsMapper)) {
                foreach (var siteBadges in GlobalData.GetBadgesPerSite())
                {
                    Uri      siteUri = generalUris.CreateSiteUri(siteBadges.Item1);
                    SiteUris uris    = new SiteUris(generalUris, siteBadges.Item1);
                    foreach (string badgeName in siteBadges.Item2)
                    {
                        WriteBadgeInfo(uris, badgeName, destWriter);
                    }
                }

                GlobalData.UpdateStats(destWriter);
            }
            Console.WriteLine(" done.");
        }
Beispiel #10
0
        public SiteUris(GeneralUris generalUris, SiteInfo site)
        {
            if (generalUris == null)
            {
                throw new ArgumentNullException("generalUris");
            }
            if (site == null)
            {
                throw new ArgumentNullException("site");
            }

            this.generalUris = generalUris;
            this.site        = site;

            stackExchangeSite = generalUris.CreateSiteUri(site);
            BaseUri           = new Uri(generalUris.SiteDataPrefix, site.Name + (site.IsMetaSite ? "-meta" : "") + "/");

            tagPrefix   = new Uri(BaseUri, "tag/");
            badgePrefix = new Uri(BaseUri, "badge/");
        }
Beispiel #11
0
        private static void ConvertSiteList(GeneralUris generalUris, string tempDir, Uri baseUri, string destDir, VDS.RDF.INamespaceMapper nsMapper)
        {
            string srcFile = Path.Combine(tempDir, "Sites.xml");

            ConsoleHelper.WriteMilestone("Downloading site list ...");
            using (var client = new WebClient()) {
                client.DownloadFile(new Uri(baseUri, "Sites.xml"), srcFile);
            }
            Console.WriteLine(" done.");

            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_sites.ttl")), nsMapper)) {
                using (var fs = File.OpenRead(srcFile)) {
                    using (var reader = XmlReader.Create(fs)) {
                        while (reader.NodeType != XmlNodeType.Element)
                        {
                            if (!reader.Read())
                            {
                                ConsoleHelper.WriteErrorLine("No contents found in file {0}.", srcFile);
                                return;
                            }
                        }

                        if (reader.LocalName == "sitelist")
                        {
                            ConvertSites(generalUris, reader, destWriter);
                        }
                        else
                        {
                            ConsoleHelper.WriteWarningLine("Unknown root element \"{0}\". Skipping document.", reader.LocalName);
                        }
                    }
                }

                GlobalData.UpdateStats(destWriter);
            }

            Console.WriteLine("Conversion of site list completed.");
        }
        private static void WriteOntologyDefinitions(GeneralUris generalUris, SequentialTurtleWriter w)
        {
            // ontology metadata

            string ontologyUri = generalUris.OntologyPrefix.AbsoluteUri ?? "";

            if (ontologyUri.Length > 0)
            {
                switch (ontologyUri[ontologyUri.Length - 1])
                {
                case '/':
                case '#':
                    ontologyUri = ontologyUri.Substring(0, ontologyUri.Length - 1);
                    break;
                }
            }
            w.StartTriple(new Uri(ontologyUri));
            w.AddToTriple(generalUris.TypeProperty, new Uri(NamespaceMapper.OWL + "Ontology"));
            w.AddToTriple(generalUris.TitleProperty, "SE2Rdf Output");
            w.AddToTriple(generalUris.DateProperty, DateTime.Now);
            w.AddToTriple(new Uri(NamespaceMapper.OWL + "imports"), new Uri("http://purl.org/dc/elements/1.1"));             // TODO: is this correct/requierd?

            // types

            Uri postType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "Post");

            WriteClassDecl(w, generalUris.QuestionType, "Question");
            w.AddToTriple(subClassOfUri, postType);
            w.AddAnonymousToTriple(subClassOfUri);
            // TODO: does not seem to work yet in VOWL => test in Protege
            //w.AddToTriple(generalUris.TypeProperty, restrictionUri);
            w.AddToTriple(new Uri(NamespaceMapper.OWL + "onProperty"), generalUris.TitleProperty);
            w.AddToTriple(new Uri(NamespaceMapper.OWL + "cardinality"), 1);
            w.FinishAnonymousNode();

            WriteClassDecl(w, generalUris.AnswerType, "Answer");
            w.AddToTriple(subClassOfUri, postType);

            Uri tagWikiType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "TagWiki");

            WriteClassDecl(w, generalUris.TagExcerptType, "Tag Excerpt");
            w.AddToTriple(subClassOfUri, tagWikiType);
            WriteClassDecl(w, generalUris.TagDescriptionType, "Tag Description");
            w.AddToTriple(subClassOfUri, tagWikiType);

            WriteClassDecl(w, generalUris.SiteInfoType, "Q&A Site");
            WriteClassDecl(w, generalUris.UserType, "Site-specific User");

            WriteClassDecl(w, generalUris.AccountType, "Account");
            w.AddToTriple(subClassOfUri, generalUris.PersonType);

            WriteClassDecl(w, generalUris.CommentType, "Comment");
            WriteClassDecl(w, generalUris.TagType, "Tag");
            WriteClassDecl(w, generalUris.AcceptanceType, "Acceptance");

            Uri voteType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "Vote");

            WriteClassDecl(w, voteType, "Vote");
            WriteClassDecl(w, generalUris.UpVoteType, "Upvote");
            w.AddToTriple(subClassOfUri, voteType);
            WriteClassDecl(w, generalUris.DownVoteType, "Downvote");
            w.AddToTriple(subClassOfUri, voteType);

            WriteClassDecl(w, generalUris.FavoriteType, "Favorite");
            WriteClassDecl(w, generalUris.BadgeType, "Badge");
            WriteClassDecl(w, generalUris.AssignedBadgeType, "Assigned Badge");

            Uri postActionType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "PostAction");

            WriteClassDecl(w, postActionType, "Post Action");
            WriteClassDecl(w, generalUris.PostClosureType, "Closure");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostReopeningType, "Reopening");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostDeletionType, "Deletion");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostUndeletionType, "Undeletion");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostLockingType, "Locking");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostUnlockingType, "Unlocking");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostProtectionType, "Protection");
            w.AddToTriple(subClassOfUri, postActionType);
            WriteClassDecl(w, generalUris.PostUnprotectionType, "Unprotection");
            w.AddToTriple(subClassOfUri, postActionType);

            WriteClassDecl(w, generalUris.StartOfBountyType, "Start of Bounty");
            WriteClassDecl(w, generalUris.EndOfBountyType, "End of Bounty");
            WriteClassDecl(w, generalUris.CloseReasonType, "Close Reason");

            // properties

            WritePropDecl(w, generalUris.StackExchangeWebsiteProperty, true,
                          new[] { postType, generalUris.TagType, generalUris.UserType, generalUris.BadgeType },
                          new[] { generalUris.SiteInfoType });
            WritePropDecl(w, generalUris.ScoreProperty, false,
                          new[] { generalUris.CommentType, postType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.OwnerProperty, true,
                          new[] { generalUris.CommentType, postType, tagWikiType },
                          new[] { generalUris.UserType });
            WritePropDecl(w, generalUris.CloseReasonProperty, true,
                          new[] { generalUris.PostClosureType },
                          new[] { generalUris.CloseReasonType });
            WritePropDecl(w, generalUris.ParticipantProperty, true,
                          new[] { postActionType },
                          new[] { generalUris.UserType });
            WritePropDecl(w, generalUris.CommentProperty, true,
                          new[] { postType },
                          new[] { generalUris.CommentType });
            WritePropDecl(w, generalUris.ViewCountProperty, false,
                          new[] { generalUris.QuestionType, generalUris.UserType, generalUris.AccountType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.TagProperty, true,
                          new[] { postType },             // TODO: verify!
                          new[] { generalUris.TagType });
            WritePropDecl(w, generalUris.AnswerProperty, true,
                          new[] { generalUris.QuestionType, generalUris.EndOfBountyType },
                          new[] { generalUris.AnswerType });

            WritePropDecl(w, generalUris.AcceptedAnswerProperty, true,
                          new[] { generalUris.QuestionType },
                          new[] { generalUris.AnswerType });
            w.AddToTriple(subPropertyOfUri, generalUris.AnswerProperty);

            WritePropDecl(w, generalUris.LastEditDateProperty, false,
                          new[] { postType, tagWikiType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "dateTime") });
            WritePropDecl(w, generalUris.DuplicateProperty, true,
                          new[] { generalUris.QuestionType },
                          new[] { generalUris.QuestionType },
                          new Uri(NamespaceMapper.OWL + "IrreflexiveProperty"));
            WritePropDecl(w, generalUris.EventProperty, true,
                          new[] { postType },
                          new[] { postActionType });
            WritePropDecl(w, generalUris.TagExcerptProperty, true,
                          new[] { generalUris.TagType },
                          new[] { generalUris.TagExcerptType });
            WritePropDecl(w, generalUris.TagDescriptionProperty, true,
                          new[] { generalUris.TagType },
                          new[] { generalUris.TagDescriptionType });
            WritePropDecl(w, generalUris.BadgeProperty, true,
                          new[] { generalUris.UserType },
                          new[] { generalUris.BadgeType });
            WritePropDecl(w, generalUris.ReputationProperty, false,
                          new[] { generalUris.UserType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.UpVotesProperty, false,
                          new[] { generalUris.UserType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.DownVotesProperty, false,
                          new[] { generalUris.UserType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.AccountProperty, true,
                          new[] { generalUris.UserType },
                          new[] { generalUris.AccountType });
            w.AddToTriple(generalUris.TypeProperty, new Uri(NamespaceMapper.OWL + "FunctionalProperty"));
            WritePropDecl(w, generalUris.LastSeenProperty, false,
                          new[] { generalUris.UserType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "dateTime") });
            WritePropDecl(w, generalUris.FavoriteProperty, true,
                          new[] { generalUris.UserType },
                          new[] { generalUris.QuestionType });
            WritePropDecl(w, generalUris.IsMetaSiteProperty, false,
                          new[] { generalUris.SiteInfoType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "boolean") });
            WritePropDecl(w, generalUris.ParentSiteProperty, true,
                          new[] { generalUris.SiteInfoType },
                          new[] { generalUris.SiteInfoType },
                          new Uri(NamespaceMapper.OWL + "IrreflexiveProperty"));
            WritePropDecl(w, generalUris.PostProperty, true,
                          new[] { generalUris.StartOfBountyType },
                          new[] { generalUris.QuestionType });
            WritePropDecl(w, generalUris.DonorProperty, true,
                          new[] { generalUris.StartOfBountyType },
                          new[] { generalUris.UserType });
            WritePropDecl(w, generalUris.OfferedAmountProperty, false,
                          new[] { generalUris.StartOfBountyType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
            WritePropDecl(w, generalUris.TransferredAmountProperty, false,
                          new[] { generalUris.EndOfBountyType },
                          new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") });
        }
        public static void Convert(GeneralUris generalUris, string srcFile, string destDir, SiteInfo website)
        {
            string fileNameOnly = Path.GetFileName(srcFile);

            Console.WriteLine("Processing {0} ...", fileNameOnly);

            var siteUris = new SiteUris(generalUris, website);

            var nsMapper = siteUris.CreateNamespaceMapper();

            using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, website + "-" + Path.GetFileNameWithoutExtension(srcFile) + ".ttl")), nsMapper)) {
                using (var fs = File.OpenRead(srcFile)) {
                    using (var reader = XmlReader.Create(fs)) {
                        while (reader.NodeType != XmlNodeType.Element)
                        {
                            if (!reader.Read())
                            {
                                ConsoleHelper.WriteErrorLine("No contents found in file {0}.", srcFile);
                                return;
                            }
                        }

                        switch (reader.LocalName)
                        {
                        case "badges":
                            ConsoleHelper.WriteInfoLine("List of badges identified.");
                            ConvertBadges(siteUris, reader, destWriter);
                            break;

                        case "comments":
                            ConsoleHelper.WriteInfoLine("List of comments identified.");
                            ConvertComments(siteUris, reader, destWriter);
                            break;

                        case "posthistory":
                            ConsoleHelper.WriteInfoLine("List of posthistory identified.");
                            ConvertPostHistory(siteUris, reader, destWriter);
                            break;

                        case "postlinks":
                            ConsoleHelper.WriteInfoLine("List of postlinks identified.");
                            ConvertPostLinks(siteUris, reader, destWriter);
                            break;

                        case "posts":
                            ConsoleHelper.WriteInfoLine("List of posts identified.");
                            ConvertPosts(siteUris, reader, destWriter);
                            break;

                        case "tags":
                            ConsoleHelper.WriteInfoLine("List of tags identified.");
                            ConvertTags(siteUris, reader, destWriter);
                            break;

                        case "users":
                            ConsoleHelper.WriteInfoLine("List of users identified.");
                            ConvertUsers(siteUris, reader, destWriter);
                            break;

                        case "votes":
                            ConsoleHelper.WriteInfoLine("List of votes identified.");
                            ConvertVotes(siteUris, reader, destWriter);
                            break;

                        default:
                            ConsoleHelper.WriteWarningLine("Unknown root element \"{0}\". Skipping document.", reader.LocalName);
                            break;
                        }
                    }
                }

                GlobalData.UpdateStats(destWriter);
            }

            Console.WriteLine("Conversion of {0} completed.", fileNameOnly);
        }
Beispiel #14
0
        /// <summary>
        /// Downloads and converts the data.
        /// </summary>
        /// <param name="generalUris">An object that provides general URIs used in the exported dataset.</param>
        /// <param name="filelist">The URL of a filelist Xml file.</param>
        /// <exception cref="ArgumentNullException">Any of the arguments is <see langword="null"/>.</exception>
        private static void RetrieveData(GeneralUris generalUris, Uri filelist)
        {
            if (generalUris == null)
            {
                throw new ArgumentNullException("generalUris");
            }
            if (filelist == null)
            {
                throw new ArgumentNullException("filelist");
            }

            string tempDir = Path.Combine(BaseDir, "tmp");
            string destDir = Path.Combine(BaseDir, "rdf");

            Directory.CreateDirectory(tempDir);
            Directory.CreateDirectory(destDir);

            DateTime startTime = DateTime.Now;

            ConsoleHelper.WriteInfoLine("Current time: {0:yyyy-MM-dd HH:mm:ss}", startTime);

            if (!GlobalData.Options.OntologyOnly)
            {
                ConsoleHelper.WriteMilestone("Downloading files list ...");
                using (var client = new WebClient()) {
                    client.DownloadFile(filelist, Path.Combine(tempDir, "files.xml"));
                }
                Console.WriteLine(" done.");

                var files = LoadFilesList(Path.Combine(tempDir, "files.xml")).OrderBy(f => f).ToArray();
                ConsoleHelper.WriteInfoLine("{0} file(s) in list, totalling to a compressed size of {1:F1} GB.",
                                            files.Length, (double)files.Sum(f => f.Size) / 1024 / 1024 / 1024);

                int processedFilesCount = 0;
                foreach (var f in files)
                {
                    SiteInfo siteInfo;
                    try {
                        siteInfo = f.RetrieveSiteInfo();
                    }
                    catch (ArgumentException ex) {
                        ConsoleHelper.WriteErrorLine("Skipping file {0}, as it cannot be associated with a website.\n{1}", f, ex);
                        continue;
                    }

                    if (IncludeSite(siteInfo))
                    {
                        GlobalData.Sites[siteInfo.Id] = siteInfo;

                        if (!GlobalData.Options.SiteListOnly)
                        {
                            string fn = f.Download(filelist, tempDir);

                            string[] rawFiles = null;
                            switch (Path.GetExtension(fn))
                            {
                            case ".7z":
                                rawFiles = ExtractSevenZipArchive(fn);
                                break;

                            default:
                                ConsoleHelper.WriteWarningLine("File {0} has an unknown file extension.", fn);
                                break;
                            }

                            if (rawFiles != null)
                            {
                                ConsoleHelper.WriteInfoLine("{0} file(s) extracted.", rawFiles.Length);

                                foreach (var rawFile in rawFiles)
                                {
                                    Converter.Convert(generalUris, rawFile, destDir, siteInfo);
                                }
                            }
                        }

                        processedFilesCount++;
                        if (processedFilesCount >= GlobalData.Options.MaxFileCount)
                        {
                            break;
                        }
                    }
                }
            }

            GlobalInformationConverter.Convert(generalUris, tempDir, filelist, destDir);

            if (!GlobalData.Options.KeepTemporaryFiles)
            {
                Console.Write("Removing temporary files ... ");
                try {
                    Directory.Delete(tempDir, true);
                    Console.WriteLine(" done.");
                }
                catch {
                    ConsoleHelper.WriteErrorLine("Please remove the directory {0} manually.", tempDir);
                }
            }

            Console.WriteLine();
            GlobalData.PrintStats();

            DateTime endTime = DateTime.Now;

            ConsoleHelper.WriteInfoLine("Current time: {0:yyyy-MM-dd HH:mm:ss}", endTime);
            ConsoleHelper.WriteInfoLine("Total duration: {0}", endTime - startTime);
        }
Beispiel #15
0
 private static void WriteCloseReason(GeneralUris generalUris, SequentialTurtleWriter w, Uri reason, string name)
 {
     w.StartTriple(reason);
     w.AddToTriple(generalUris.TypeProperty, generalUris.CloseReasonType);
     w.AddToTriple(generalUris.LabelProperty, name);
 }