internal void CheckOwner(SequentialTurtleWriter expected) { if (owner != expected) { throw new ArgumentException("The blank node does not belong to the current Turtle writer."); } }
private static void AddParticipants(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { if (r.MoveToAttribute("Text")) { EventInfo info; try { info = JsonConvert.DeserializeObject <EventInfo>(r.Value); } catch (Exception ex) { ConsoleHelper.WriteWarningLine("Invalid Json string: {0} ({2} message: {1}); skipping information.", r.Value, ex.Message, ex.GetType().FullName); return; } if (info.Voters != null) { foreach (var voter in info.Voters) { if (!string.IsNullOrWhiteSpace(voter.Id)) { w.AddToTriple(uris.GeneralUris.ParticipantProperty, uris.CreateUserUri(voter.Id)); } else if (!string.IsNullOrWhiteSpace(voter.DisplayName)) { w.AddToTriple(uris.GeneralUris.ParticipantProperty, voter.DisplayName); } } } } }
private static void ConvertSites(GeneralUris generalUris, XmlReader r, SequentialTurtleWriter w) { long skipped = 0; while (r.Read()) { switch (r.NodeType) { case XmlNodeType.Element: switch (r.LocalName) { case "row": using (var subR = r.ReadSubtree()) { subR.Read(); if (!ConvertSite(generalUris, subR, w)) { skipped++; } } break; } break; case XmlNodeType.EndElement: return; } } if (skipped > 0) { ConsoleHelper.WriteWarningLine("{0} items from the list of sites were skipped.", skipped); } }
private static void ConvertTag(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { Uri subjectUri; if (r.MoveToAttribute("TagName")) { subjectUri = uris.CreateTagUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No TagName attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } w.AddToTriple(uris.GeneralUris.LabelProperty, r.Value); w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.TagType); uris.LinkToSite(w); if (r.MoveToAttribute("ExcerptPostId")) { w.AddToTriple(uris.GeneralUris.TagExcerptProperty, uris.CreatePostUri(r.Value)); } if (r.MoveToAttribute("WikiPostId")) { w.AddToTriple(uris.GeneralUris.TagDescriptionProperty, uris.CreatePostUri(r.Value)); } // TODO: Count }
private static void ConvertPostLinks(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { var unknownLinkTypeIds = new UnknownValueStore <string>(); while (r.Read()) { switch (r.NodeType) { case XmlNodeType.Element: switch (r.LocalName) { case "row": using (var subR = r.ReadSubtree()) { subR.Read(); ConvertPostLink(uris, subR, w, unknownLinkTypeIds); } break; } break; case XmlNodeType.EndElement: long unknownLinkTypeIdCount = unknownLinkTypeIds.RegisteredValueCount; if (unknownLinkTypeIdCount > 0) { ConsoleHelper.WriteWarningLine("{0} unknown LinkTypeId value(s) found: {1}", unknownLinkTypeIdCount, unknownLinkTypeIds); } return; } } }
private static void WriteBadgeInfo(SiteUris uris, string badgeName, SequentialTurtleWriter w) { w.StartTriple(uris.CreateBadgeUri(badgeName)); w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.BadgeType); w.AddToTriple(uris.GeneralUris.LabelProperty, badgeName); uris.LinkToSite(w); }
public void LinkToSite(SequentialTurtleWriter writer) { if (writer == null) { throw new ArgumentNullException("writer"); } writer.AddToTriple(GeneralUris.StackExchangeWebsiteProperty, stackExchangeSite); }
private static void WriteConstants(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper) { ConsoleHelper.WriteMilestone("Writing constant definitions ..."); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_constants.ttl")), nsMapper)) { WriteCloseReasons(generalUris, destWriter); GlobalData.UpdateStats(destWriter); } Console.WriteLine(" done."); }
internal TurtleBlankNode(SequentialTurtleWriter owner, long id) { if (owner == null) { throw new ArgumentNullException("owner"); } this.owner = owner; this.id = id; }
private static bool ConvertSite(GeneralUris generalUris, XmlReader r, SequentialTurtleWriter w) { Uri subjectUri; string address; if (r.MoveToAttribute("Address")) { SiteInfo info; if (GlobalData.Sites.TryGetValue(r.Value, out info)) { address = r.Value; subjectUri = generalUris.CreateSiteUri(info); w.StartTriple(subjectUri); w.AddToTriple(generalUris.IsMetaSiteProperty, info.IsMetaSite); w.AddToTriple(generalUris.LanguageProperty, info.IsEnglishSite ? "en" : info.Language); } else { return(false); } } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Address attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return(false); } w.AddToTriple(generalUris.TypeProperty, generalUris.SiteInfoType); w.AddToTriple(generalUris.WebsiteProperty, new Uri("http://" + address)); if (r.MoveToAttribute("Name")) { w.AddToTriple(generalUris.LabelProperty, r.Value); w.AddToTriple(generalUris.TitleProperty, r.Value); } if (r.MoveToAttribute("Description")) { w.AddToTriple(generalUris.DescriptionProperty, r.Value); } if (r.MoveToAttribute("ParentAddress")) { SiteInfo parentInfo; if (GlobalData.Sites.TryGetValue(r.Value, out parentInfo)) { w.AddToTriple(generalUris.ParentSiteProperty, generalUris.CreateSiteUri(parentInfo)); } else { ConsoleHelper.WriteWarningLine("Unknown parent site {0}; skipping information.", r.Value); } } return(true); }
private static void WriteCloseReasons(GeneralUris generalUris, SequentialTurtleWriter w) { WriteCloseReason(generalUris, w, generalUris.DuplicateCloseReason, "Duplicate"); WriteCloseReason(generalUris, w, generalUris.OffTopicCloseReason, "Off-topic"); WriteCloseReason(generalUris, w, generalUris.SubjectiveCloseReason, "Opinion-based"); WriteCloseReason(generalUris, w, generalUris.NotAQuestionCloseReason, "Not a real question"); WriteCloseReason(generalUris, w, generalUris.TooLocalizedCloseReason, "Too localized"); WriteCloseReason(generalUris, w, generalUris.GeneralReferenceCloseReason, "General reference"); WriteCloseReason(generalUris, w, generalUris.NoiseCloseReason, "Pointless/Noise"); WriteCloseReason(generalUris, w, generalUris.UnclearCloseReason, "Unclear what you're asking"); WriteCloseReason(generalUris, w, generalUris.TooBroadCloseReason, "Too broad"); }
private static void LinkToPost(SiteUris uris, Uri subjectUri, XmlReader r, SequentialTurtleWriter w) { if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); w.AddToTriple(uris.GeneralUris.EventProperty, subjectUri); w.StartTriple(subjectUri); } else { ConsoleHelper.WriteWarningLine("Orphaned post history item: {0}", subjectUri.AbsoluteUri); } }
private static void WritePropDecl(SequentialTurtleWriter w, Uri propUri, bool isObjectProperty, IList <Uri> domain, IList <Uri> range, Uri propTypeUri = null) { w.StartTriple(propUri); if (propTypeUri == null) { w.AddToTriple(typeUri, new Uri(NamespaceMapper.OWL + (isObjectProperty ? "ObjectProperty" : "DatatypeProperty"))); } else { w.AddToTriple(typeUri, propTypeUri); } WriteUnionPropIfNecessary(w, new Uri(NamespaceMapper.RDFS + "domain"), domain); WriteUnionPropIfNecessary(w, new Uri(NamespaceMapper.RDFS + "range"), range); }
private static void WriteAccountList(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper) { ConsoleHelper.WriteMilestone("Writing account list ..."); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_users.ttl")), nsMapper)) { foreach (var id in GlobalData.AccountIds) { destWriter.StartTriple(generalUris.CreateAccountUri(id)); destWriter.AddToTriple(generalUris.TypeProperty, generalUris.AccountType); } GlobalData.UpdateStats(destWriter); } Console.WriteLine(" done."); }
/// <summary> /// Updates the total number of written bytes by the length of a Turtle document. /// </summary> /// <param name="writer">The Turtle writer.</param> /// <exception cref="ArgumentNullException"><paramref name="writer"/> is <see langword="null"/>.</exception> public static void UpdateStats(SequentialTurtleWriter writer) { if (writer == null) { throw new ArgumentNullException("writer"); } long bytes = writer.GetCurrentStreamSize(); long triples = writer.GetCurrentTripleCount(); ConsoleHelper.WriteSuccessLine("{0:F1} MB written; {1} triple(s) created.", (double)bytes / 1024 / 1024, triples); byteCount += bytes; tripleCount += triples; }
private static void ConvertUsers(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { var malformedIris = new List <string>(); long totalMalformedIriCount = 0; while (r.Read()) { switch (r.NodeType) { case XmlNodeType.Element: switch (r.LocalName) { case "row": using (var subR = r.ReadSubtree()) { subR.Read(); ConvertUser(uris, subR, w, malformedIris, ref totalMalformedIriCount); } break; } break; case XmlNodeType.EndElement: if (totalMalformedIriCount > 0) { string example; if (malformedIris.Count > 0) { var exampleBuilder = new System.Text.StringBuilder(" (e.g. "); for (int i = 0; i < malformedIris.Count; i++) { if (i > 0) { exampleBuilder.Append("; "); } exampleBuilder.Append(malformedIris[i]); } exampleBuilder.Append(")"); example = exampleBuilder.ToString(); } else { example = ""; } ConsoleHelper.WriteWarningLine("{1} malformed URL(s) found{0}, treated as string literals.", example, totalMalformedIriCount); } return; } } }
private static void WriteOntology(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper) { ConsoleHelper.WriteMilestone("Writing ontology ..."); using (var tempNsMapper = new NamespaceMapper(false)) { tempNsMapper.Import(nsMapper); tempNsMapper.AddNamespace("owl", new Uri(NamespaceMapper.OWL)); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_ontology.ttl")), tempNsMapper)) { WriteOntologyDefinitions(generalUris, destWriter); GlobalData.UpdateStats(destWriter); } } Console.WriteLine(" done."); }
private static void ConvertBadge(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { if (GlobalData.Options.FullTimeInfo) { Uri subjectUri; if (r.MoveToAttribute("Id")) { subjectUri = uris.CreateAssignedBadgeUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.AssignedBadgeType); // link to site is not required as that information is expressed by the owner and badge if (r.MoveToAttribute("UserId")) { w.AddToTriple(uris.GeneralUris.OwnerProperty, uris.CreateUserUri(r.Value)); } if (r.MoveToAttribute("Name")) { w.AddToTriple(uris.GeneralUris.BadgeProperty, uris.CreateBadgeUri(r.Value)); GlobalData.RegisterBadge(uris.Site, r.Value); } if (r.MoveToAttribute("Date")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } } else { if (r.MoveToAttribute("UserId")) { w.StartTriple(uris.CreateUserUri(r.Value)); if (r.MoveToAttribute("Name")) { w.AddToTriple(uris.GeneralUris.BadgeProperty, uris.CreateBadgeUri(r.Value)); GlobalData.RegisterBadge(uris.Site, r.Value); } } } }
private static void WriteUnionPropIfNecessary(SequentialTurtleWriter w, Uri propUri, IList <Uri> objects) { if (objects.Count > 0) { if (objects.Count == 1) { w.AddToTriple(propUri, objects[0]); } else { w.AddAnonymousToTriple(propUri); w.AddToTriple(typeUri, new Uri(NamespaceMapper.OWL + "Class")); w.AddToTriple(new Uri(NamespaceMapper.OWL + "unionOf"), true, objects); w.FinishAnonymousNode(); } } }
private static void ConvertComment(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { Uri subjectUri; if (r.MoveToAttribute("Id")) { subjectUri = uris.CreateCommentUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.CommentType); uris.LinkToSite(w); if (r.MoveToAttribute("Score")) { w.AddToTriple(uris.GeneralUris.ScoreProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("Text")) { w.AddToTriple(uris.GeneralUris.DescriptionProperty, r.Value); } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("UserId")) { w.AddToTriple(uris.GeneralUris.OwnerProperty, uris.CreateUserUri(r.Value)); } if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); w.AddToTriple(uris.GeneralUris.CommentProperty, subjectUri); w.StartTriple(subjectUri); } else { ConsoleHelper.WriteWarningLine("Orphaned comment: {0}", subjectUri); } }
private static void WriteBadgesLists(GeneralUris generalUris, string destDir, VDS.RDF.INamespaceMapper nsMapper) { ConsoleHelper.WriteMilestone("Writing lists of badges ..."); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_badges.ttl")), nsMapper)) { foreach (var siteBadges in GlobalData.GetBadgesPerSite()) { Uri siteUri = generalUris.CreateSiteUri(siteBadges.Item1); SiteUris uris = new SiteUris(generalUris, siteBadges.Item1); foreach (string badgeName in siteBadges.Item2) { WriteBadgeInfo(uris, badgeName, destWriter); } } GlobalData.UpdateStats(destWriter); } Console.WriteLine(" done."); }
private static void ConvertPostLink(SiteUris uris, XmlReader r, SequentialTurtleWriter w, UnknownValueStore <string> unknownLinkTypeIds) { if (r.MoveToAttribute("LinkTypeId")) { switch (r.Value) { case "1": // linked if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); if (r.MoveToAttribute("RelatedPostId")) { w.AddToTriple(uris.GeneralUris.LinkProperty, uris.CreatePostUri(r.Value)); } } break; case "3": // duplicate if (r.MoveToAttribute("RelatedPostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); if (r.MoveToAttribute("PostId")) { w.AddToTriple(uris.GeneralUris.DuplicateProperty, uris.CreatePostUri(r.Value)); } } break; default: unknownLinkTypeIds.RegisterUnknownValue(r.Value); break; } } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No LinkTypeId attribute found on element {0}. Skipping element.", r.ReadOuterXml()); } }
private static void ConvertSiteList(GeneralUris generalUris, string tempDir, Uri baseUri, string destDir, VDS.RDF.INamespaceMapper nsMapper) { string srcFile = Path.Combine(tempDir, "Sites.xml"); ConsoleHelper.WriteMilestone("Downloading site list ..."); using (var client = new WebClient()) { client.DownloadFile(new Uri(baseUri, "Sites.xml"), srcFile); } Console.WriteLine(" done."); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, "_sites.ttl")), nsMapper)) { using (var fs = File.OpenRead(srcFile)) { using (var reader = XmlReader.Create(fs)) { while (reader.NodeType != XmlNodeType.Element) { if (!reader.Read()) { ConsoleHelper.WriteErrorLine("No contents found in file {0}.", srcFile); return; } } if (reader.LocalName == "sitelist") { ConvertSites(generalUris, reader, destWriter); } else { ConsoleHelper.WriteWarningLine("Unknown root element \"{0}\". Skipping document.", reader.LocalName); } } } GlobalData.UpdateStats(destWriter); } Console.WriteLine("Conversion of site list completed."); }
private static void ConvertComments(SiteUris uris, XmlReader r, SequentialTurtleWriter w) { while (r.Read()) { switch (r.NodeType) { case XmlNodeType.Element: switch (r.LocalName) { case "row": using (var subR = r.ReadSubtree()) { subR.Read(); ConvertComment(uris, subR, w); } break; } break; case XmlNodeType.EndElement: return; } } }
private static void WriteOntologyDefinitions(GeneralUris generalUris, SequentialTurtleWriter w) { // ontology metadata string ontologyUri = generalUris.OntologyPrefix.AbsoluteUri ?? ""; if (ontologyUri.Length > 0) { switch (ontologyUri[ontologyUri.Length - 1]) { case '/': case '#': ontologyUri = ontologyUri.Substring(0, ontologyUri.Length - 1); break; } } w.StartTriple(new Uri(ontologyUri)); w.AddToTriple(generalUris.TypeProperty, new Uri(NamespaceMapper.OWL + "Ontology")); w.AddToTriple(generalUris.TitleProperty, "SE2Rdf Output"); w.AddToTriple(generalUris.DateProperty, DateTime.Now); w.AddToTriple(new Uri(NamespaceMapper.OWL + "imports"), new Uri("http://purl.org/dc/elements/1.1")); // TODO: is this correct/requierd? // types Uri postType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "Post"); WriteClassDecl(w, generalUris.QuestionType, "Question"); w.AddToTriple(subClassOfUri, postType); w.AddAnonymousToTriple(subClassOfUri); // TODO: does not seem to work yet in VOWL => test in Protege //w.AddToTriple(generalUris.TypeProperty, restrictionUri); w.AddToTriple(new Uri(NamespaceMapper.OWL + "onProperty"), generalUris.TitleProperty); w.AddToTriple(new Uri(NamespaceMapper.OWL + "cardinality"), 1); w.FinishAnonymousNode(); WriteClassDecl(w, generalUris.AnswerType, "Answer"); w.AddToTriple(subClassOfUri, postType); Uri tagWikiType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "TagWiki"); WriteClassDecl(w, generalUris.TagExcerptType, "Tag Excerpt"); w.AddToTriple(subClassOfUri, tagWikiType); WriteClassDecl(w, generalUris.TagDescriptionType, "Tag Description"); w.AddToTriple(subClassOfUri, tagWikiType); WriteClassDecl(w, generalUris.SiteInfoType, "Q&A Site"); WriteClassDecl(w, generalUris.UserType, "Site-specific User"); WriteClassDecl(w, generalUris.AccountType, "Account"); w.AddToTriple(subClassOfUri, generalUris.PersonType); WriteClassDecl(w, generalUris.CommentType, "Comment"); WriteClassDecl(w, generalUris.TagType, "Tag"); WriteClassDecl(w, generalUris.AcceptanceType, "Acceptance"); Uri voteType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "Vote"); WriteClassDecl(w, voteType, "Vote"); WriteClassDecl(w, generalUris.UpVoteType, "Upvote"); w.AddToTriple(subClassOfUri, voteType); WriteClassDecl(w, generalUris.DownVoteType, "Downvote"); w.AddToTriple(subClassOfUri, voteType); WriteClassDecl(w, generalUris.FavoriteType, "Favorite"); WriteClassDecl(w, generalUris.BadgeType, "Badge"); WriteClassDecl(w, generalUris.AssignedBadgeType, "Assigned Badge"); Uri postActionType = new Uri(generalUris.OntologyPrefix.AbsoluteUri + "PostAction"); WriteClassDecl(w, postActionType, "Post Action"); WriteClassDecl(w, generalUris.PostClosureType, "Closure"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostReopeningType, "Reopening"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostDeletionType, "Deletion"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostUndeletionType, "Undeletion"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostLockingType, "Locking"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostUnlockingType, "Unlocking"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostProtectionType, "Protection"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.PostUnprotectionType, "Unprotection"); w.AddToTriple(subClassOfUri, postActionType); WriteClassDecl(w, generalUris.StartOfBountyType, "Start of Bounty"); WriteClassDecl(w, generalUris.EndOfBountyType, "End of Bounty"); WriteClassDecl(w, generalUris.CloseReasonType, "Close Reason"); // properties WritePropDecl(w, generalUris.StackExchangeWebsiteProperty, true, new[] { postType, generalUris.TagType, generalUris.UserType, generalUris.BadgeType }, new[] { generalUris.SiteInfoType }); WritePropDecl(w, generalUris.ScoreProperty, false, new[] { generalUris.CommentType, postType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.OwnerProperty, true, new[] { generalUris.CommentType, postType, tagWikiType }, new[] { generalUris.UserType }); WritePropDecl(w, generalUris.CloseReasonProperty, true, new[] { generalUris.PostClosureType }, new[] { generalUris.CloseReasonType }); WritePropDecl(w, generalUris.ParticipantProperty, true, new[] { postActionType }, new[] { generalUris.UserType }); WritePropDecl(w, generalUris.CommentProperty, true, new[] { postType }, new[] { generalUris.CommentType }); WritePropDecl(w, generalUris.ViewCountProperty, false, new[] { generalUris.QuestionType, generalUris.UserType, generalUris.AccountType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.TagProperty, true, new[] { postType }, // TODO: verify! new[] { generalUris.TagType }); WritePropDecl(w, generalUris.AnswerProperty, true, new[] { generalUris.QuestionType, generalUris.EndOfBountyType }, new[] { generalUris.AnswerType }); WritePropDecl(w, generalUris.AcceptedAnswerProperty, true, new[] { generalUris.QuestionType }, new[] { generalUris.AnswerType }); w.AddToTriple(subPropertyOfUri, generalUris.AnswerProperty); WritePropDecl(w, generalUris.LastEditDateProperty, false, new[] { postType, tagWikiType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "dateTime") }); WritePropDecl(w, generalUris.DuplicateProperty, true, new[] { generalUris.QuestionType }, new[] { generalUris.QuestionType }, new Uri(NamespaceMapper.OWL + "IrreflexiveProperty")); WritePropDecl(w, generalUris.EventProperty, true, new[] { postType }, new[] { postActionType }); WritePropDecl(w, generalUris.TagExcerptProperty, true, new[] { generalUris.TagType }, new[] { generalUris.TagExcerptType }); WritePropDecl(w, generalUris.TagDescriptionProperty, true, new[] { generalUris.TagType }, new[] { generalUris.TagDescriptionType }); WritePropDecl(w, generalUris.BadgeProperty, true, new[] { generalUris.UserType }, new[] { generalUris.BadgeType }); WritePropDecl(w, generalUris.ReputationProperty, false, new[] { generalUris.UserType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.UpVotesProperty, false, new[] { generalUris.UserType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.DownVotesProperty, false, new[] { generalUris.UserType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.AccountProperty, true, new[] { generalUris.UserType }, new[] { generalUris.AccountType }); w.AddToTriple(generalUris.TypeProperty, new Uri(NamespaceMapper.OWL + "FunctionalProperty")); WritePropDecl(w, generalUris.LastSeenProperty, false, new[] { generalUris.UserType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "dateTime") }); WritePropDecl(w, generalUris.FavoriteProperty, true, new[] { generalUris.UserType }, new[] { generalUris.QuestionType }); WritePropDecl(w, generalUris.IsMetaSiteProperty, false, new[] { generalUris.SiteInfoType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "boolean") }); WritePropDecl(w, generalUris.ParentSiteProperty, true, new[] { generalUris.SiteInfoType }, new[] { generalUris.SiteInfoType }, new Uri(NamespaceMapper.OWL + "IrreflexiveProperty")); WritePropDecl(w, generalUris.PostProperty, true, new[] { generalUris.StartOfBountyType }, new[] { generalUris.QuestionType }); WritePropDecl(w, generalUris.DonorProperty, true, new[] { generalUris.StartOfBountyType }, new[] { generalUris.UserType }); WritePropDecl(w, generalUris.OfferedAmountProperty, false, new[] { generalUris.StartOfBountyType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); WritePropDecl(w, generalUris.TransferredAmountProperty, false, new[] { generalUris.EndOfBountyType }, new[] { new Uri(NamespaceMapper.XMLSCHEMA + "integer") }); }
private static void ConvertVote(SiteUris uris, XmlReader r, SequentialTurtleWriter w, UnknownValueStore <string> unknownVoteTypeIds) { Uri subjectUri; if (r.MoveToAttribute("Id")) { subjectUri = uris.CreateVoteUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } if (r.MoveToAttribute("VoteTypeId")) { switch (r.Value) { case "1": // acceptance if (GlobalData.Options.FullTimeInfo) { w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.AcceptanceType); uris.LinkToSite(w); if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); w.AddToTriple(uris.GeneralUris.AcceptanceProperty, subjectUri); w.StartTriple(subjectUri); } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } } break; case "2": // upvote if (GlobalData.Options.FullTimeInfo) { w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.UpVoteType); uris.LinkToSite(w); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); w.AddToTriple(uris.GeneralUris.VoteProperty, subjectUri); w.StartTriple(subjectUri); } } break; case "3": // downvote if (GlobalData.Options.FullTimeInfo) { w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.DownVoteType); uris.LinkToSite(w); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("PostId")) { w.StartTriple(uris.CreatePostUri(r.Value)); w.AddToTriple(uris.GeneralUris.VoteProperty, subjectUri); w.StartTriple(subjectUri); } } break; case "4": // offensive break; case "5": // favorite if (GlobalData.Options.FullTimeInfo) { w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.FavoriteType); uris.LinkToSite(w); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("PostId")) { w.AddToTriple(uris.GeneralUris.PostProperty, uris.CreatePostUri(r.Value)); } if (r.MoveToAttribute("UserId")) { w.StartTriple(uris.CreateUserUri(r.Value)); w.AddToTriple(uris.GeneralUris.FavoriteProperty, subjectUri); w.StartTriple(subjectUri); } } else { if (r.MoveToAttribute("UserId")) { w.StartTriple(uris.CreateUserUri(r.Value)); if (r.MoveToAttribute("PostId")) { w.AddToTriple(uris.GeneralUris.FavoriteProperty, uris.CreatePostUri(r.Value)); } w.StartTriple(subjectUri); } } break; case "6": // closed break; case "7": // reopened break; case "8": // bounty started w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.StartOfBountyType); if (r.MoveToAttribute("PostId")) { w.AddToTriple(uris.GeneralUris.PostProperty, uris.CreatePostUri(r.Value)); } if (r.MoveToAttribute("UserId")) { w.AddToTriple(uris.GeneralUris.DonorProperty, uris.CreateUserUri(r.Value)); } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("BountyAmount")) { w.AddToTriple(uris.GeneralUris.OfferedAmountProperty, long.Parse(r.Value)); } break; case "9": // bounty closed w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.EndOfBountyType); if (r.MoveToAttribute("PostId")) { w.AddToTriple(uris.GeneralUris.AnswerProperty, uris.CreatePostUri(r.Value)); } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("BountyAmount")) { w.AddToTriple(uris.GeneralUris.TransferredAmountProperty, long.Parse(r.Value)); } break; case "10": // deletion break; case "11": // undeletion break; case "12": // spam break; case "13": // moderator informed break; case "15": // under moderator review break; case "16": // approved edit suggestion break; default: unknownVoteTypeIds.RegisterUnknownValue(r.Value); break; } } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No VoteTypeId attribute found on element {0}. Skipping element.", r.ReadOuterXml()); } }
private static void ConvertPostHistoryItem(SiteUris uris, XmlReader r, SequentialTurtleWriter w, UnknownValueStore <string> unknownPostHistoryTypeIds) { Uri subjectUri; if (r.MoveToAttribute("Id")) { subjectUri = uris.CreatePostHistoryUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } if (r.MoveToAttribute("PostHistoryTypeId")) { switch (r.Value) { case "1": // initial title break; case "2": // initial body break; case "3": // initial tags break; case "4": // edit title break; case "5": // edit body break; case "6": // edit tags break; case "7": // rollback title break; case "8": // rollback body break; case "9": // rollback tags break; case "10": // post closed w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostClosureType); if (r.MoveToAttribute("Comment")) { switch (r.Value) { case "1": // Exact Duplicate case "101": // duplicate w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.DuplicateCloseReason); break; case "2": // Off-topic case "102": // Off-topic w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.OffTopicCloseReason); break; case "3": // Subjective and argumentative case "105": // Primarily opinion-based w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.SubjectiveCloseReason); break; case "4": // Not a real question w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.NotAQuestionCloseReason); break; case "7": // Too localized w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.TooLocalizedCloseReason); break; case "10": // General reference w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.GeneralReferenceCloseReason); break; case "20": // Noise or pointless w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.NoiseCloseReason); break; case "103": // Unclear what you're asking w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.UnclearCloseReason); break; case "104": // Too broad w.AddToTriple(uris.GeneralUris.CloseReasonProperty, uris.GeneralUris.TooBroadCloseReason); break; default: ConsoleHelper.WriteWarningLine("Unknown post close reason: {0}", r.Value); break; } } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "11": // post reopened w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostReopeningType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "12": // post deleted w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostDeletionType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "13": // post undeleted w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostUndeletionType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "14": // post locked w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostLockingType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "15": // post unlocked w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostUnlockingType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "16": // community owned break; case "17": // post migrated superseded with 35/36 break; case "18": // question merged break; case "19": // question protected w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostProtectionType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "20": // question unprotected w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.PostUnprotectionType); if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } AddParticipants(uris, r, w); LinkToPost(uris, subjectUri, r, w); break; case "21": // post disassociated break; case "22": // question unmerged break; case "24": // suggested edit applied break; case "25": // post tweeted break; case "31": // comment discussion moved to chat break; case "33": // post notice added break; case "34": // post notice removed break; case "35": // post migrated away replaces 17 break; case "36": // post migrated here replaces 17 break; case "37": // post merge source break; case "38": // post merge destination break; default: unknownPostHistoryTypeIds.RegisterUnknownValue(r.Value); break; } } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No PostHistoryTypeId attribute found on element {0}. Skipping element.", r.ReadOuterXml()); } }
private static void WriteClassDecl(SequentialTurtleWriter w, Uri classUri, string className) { w.StartTriple(classUri); w.AddToTriple(typeUri, new Uri(NamespaceMapper.OWL + "Class")); w.AddToTriple(new Uri(NamespaceMapper.RDFS + "label"), className); }
private static void ConvertUser(SiteUris uris, XmlReader r, SequentialTurtleWriter w, ICollection <string> malformedIris, ref long totalMalformedIriCount) { Uri subjectUri; if (r.MoveToAttribute("Id")) { subjectUri = uris.CreateUserUri(r.Value); w.StartTriple(subjectUri); } else { r.MoveToElement(); ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml()); return; } w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.UserType); uris.LinkToSite(w); if (r.MoveToAttribute("DisplayName")) { w.AddToTriple(uris.GeneralUris.LabelProperty, r.Value); w.AddToTriple(uris.GeneralUris.UserNameProperty, r.Value); } if (r.MoveToAttribute("CreationDate")) { w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } if (r.MoveToAttribute("Reputation")) { w.AddToTriple(uris.GeneralUris.ReputationProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("Location")) { w.AddToTriple(uris.GeneralUris.LocationProperty, r.Value); } if (r.MoveToAttribute("Age")) { w.AddToTriple(uris.GeneralUris.AgeProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("Views")) { w.AddToTriple(uris.GeneralUris.ViewCountProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("UpVotes")) { w.AddToTriple(uris.GeneralUris.UpVotesProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("DownVotes")) { w.AddToTriple(uris.GeneralUris.DownVotesProperty, long.Parse(r.Value)); } if (r.MoveToAttribute("WebsiteUrl")) { string websiteUrl = r.Value; if (!string.IsNullOrWhiteSpace(websiteUrl)) { if (websiteUrl.ToLowerInvariant().StartsWith("http")) { websiteUrl = "http" + websiteUrl.Substring(4); } if (!emptyWebsiteRegex.IsMatch(websiteUrl.ToLowerInvariant())) { if (websiteRegex.IsMatch(websiteUrl)) { try { Uri homepageUrl = new Uri(websiteUrl); w.AddToTriple(uris.GeneralUris.WebsiteProperty, homepageUrl); } catch (UriFormatException) { totalMalformedIriCount++; if (malformedIris.Count < GlobalData.Options.MaxDisplayedMalformedIris) { malformedIris.Add(websiteUrl); } w.AddToTriple(uris.GeneralUris.WebsiteProperty, websiteUrl); } } else { totalMalformedIriCount++; if (malformedIris.Count < GlobalData.Options.MaxDisplayedMalformedIris) { malformedIris.Add(websiteUrl); } w.AddToTriple(uris.GeneralUris.WebsiteProperty, websiteUrl); } } } } if (r.MoveToAttribute("AccountId")) { w.AddToTriple(uris.GeneralUris.AccountProperty, uris.GeneralUris.CreateAccountUri(r.Value)); GlobalData.AccountIds.Add(r.Value); } if (r.MoveToAttribute("AboutMe")) { string desc = r.Value; if (!string.IsNullOrWhiteSpace(desc)) { w.AddToTriple(uris.GeneralUris.DescriptionProperty, desc); } } if (r.MoveToAttribute("LastAccessDate")) { w.AddToTriple(uris.GeneralUris.LastSeenProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture)); } }
public static void Convert(GeneralUris generalUris, string srcFile, string destDir, SiteInfo website) { string fileNameOnly = Path.GetFileName(srcFile); Console.WriteLine("Processing {0} ...", fileNameOnly); var siteUris = new SiteUris(generalUris, website); var nsMapper = siteUris.CreateNamespaceMapper(); using (var destWriter = new SequentialTurtleWriter(File.CreateText(Path.Combine(destDir, website + "-" + Path.GetFileNameWithoutExtension(srcFile) + ".ttl")), nsMapper)) { using (var fs = File.OpenRead(srcFile)) { using (var reader = XmlReader.Create(fs)) { while (reader.NodeType != XmlNodeType.Element) { if (!reader.Read()) { ConsoleHelper.WriteErrorLine("No contents found in file {0}.", srcFile); return; } } switch (reader.LocalName) { case "badges": ConsoleHelper.WriteInfoLine("List of badges identified."); ConvertBadges(siteUris, reader, destWriter); break; case "comments": ConsoleHelper.WriteInfoLine("List of comments identified."); ConvertComments(siteUris, reader, destWriter); break; case "posthistory": ConsoleHelper.WriteInfoLine("List of posthistory identified."); ConvertPostHistory(siteUris, reader, destWriter); break; case "postlinks": ConsoleHelper.WriteInfoLine("List of postlinks identified."); ConvertPostLinks(siteUris, reader, destWriter); break; case "posts": ConsoleHelper.WriteInfoLine("List of posts identified."); ConvertPosts(siteUris, reader, destWriter); break; case "tags": ConsoleHelper.WriteInfoLine("List of tags identified."); ConvertTags(siteUris, reader, destWriter); break; case "users": ConsoleHelper.WriteInfoLine("List of users identified."); ConvertUsers(siteUris, reader, destWriter); break; case "votes": ConsoleHelper.WriteInfoLine("List of votes identified."); ConvertVotes(siteUris, reader, destWriter); break; default: ConsoleHelper.WriteWarningLine("Unknown root element \"{0}\". Skipping document.", reader.LocalName); break; } } } GlobalData.UpdateStats(destWriter); } Console.WriteLine("Conversion of {0} completed.", fileNameOnly); }