Beispiel #1
0
 private static void WriteBadgeInfo(SiteUris uris, string badgeName, SequentialTurtleWriter w)
 {
     w.StartTriple(uris.CreateBadgeUri(badgeName));
     w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.BadgeType);
     w.AddToTriple(uris.GeneralUris.LabelProperty, badgeName);
     uris.LinkToSite(w);
 }
        private static void ConvertTag(SiteUris uris, XmlReader r, SequentialTurtleWriter w)
        {
            Uri subjectUri;

            if (r.MoveToAttribute("TagName"))
            {
                subjectUri = uris.CreateTagUri(r.Value);
                w.StartTriple(subjectUri);
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No TagName attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return;
            }

            w.AddToTriple(uris.GeneralUris.LabelProperty, r.Value);
            w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.TagType);
            uris.LinkToSite(w);
            if (r.MoveToAttribute("ExcerptPostId"))
            {
                w.AddToTriple(uris.GeneralUris.TagExcerptProperty, uris.CreatePostUri(r.Value));
            }
            if (r.MoveToAttribute("WikiPostId"))
            {
                w.AddToTriple(uris.GeneralUris.TagDescriptionProperty, uris.CreatePostUri(r.Value));
            }
            // TODO: Count
        }
        private static void ConvertComment(SiteUris uris, XmlReader r, SequentialTurtleWriter w)
        {
            Uri subjectUri;

            if (r.MoveToAttribute("Id"))
            {
                subjectUri = uris.CreateCommentUri(r.Value);
                w.StartTriple(subjectUri);
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return;
            }

            w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.CommentType);
            uris.LinkToSite(w);
            if (r.MoveToAttribute("Score"))
            {
                w.AddToTriple(uris.GeneralUris.ScoreProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("Text"))
            {
                w.AddToTriple(uris.GeneralUris.DescriptionProperty, r.Value);
            }
            if (r.MoveToAttribute("CreationDate"))
            {
                w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
            }
            if (r.MoveToAttribute("UserId"))
            {
                w.AddToTriple(uris.GeneralUris.OwnerProperty, uris.CreateUserUri(r.Value));
            }

            if (r.MoveToAttribute("PostId"))
            {
                w.StartTriple(uris.CreatePostUri(r.Value));
                w.AddToTriple(uris.GeneralUris.CommentProperty, subjectUri);
                w.StartTriple(subjectUri);
            }
            else
            {
                ConsoleHelper.WriteWarningLine("Orphaned comment: {0}", subjectUri);
            }
        }
        private static void ConvertUser(SiteUris uris, XmlReader r, SequentialTurtleWriter w, ICollection <string> malformedIris, ref long totalMalformedIriCount)
        {
            Uri subjectUri;

            if (r.MoveToAttribute("Id"))
            {
                subjectUri = uris.CreateUserUri(r.Value);
                w.StartTriple(subjectUri);
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return;
            }

            w.AddToTriple(uris.GeneralUris.TypeProperty,
                          uris.GeneralUris.UserType);
            uris.LinkToSite(w);
            if (r.MoveToAttribute("DisplayName"))
            {
                w.AddToTriple(uris.GeneralUris.LabelProperty, r.Value);
                w.AddToTriple(uris.GeneralUris.UserNameProperty, r.Value);
            }
            if (r.MoveToAttribute("CreationDate"))
            {
                w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
            }
            if (r.MoveToAttribute("Reputation"))
            {
                w.AddToTriple(uris.GeneralUris.ReputationProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("Location"))
            {
                w.AddToTriple(uris.GeneralUris.LocationProperty, r.Value);
            }
            if (r.MoveToAttribute("Age"))
            {
                w.AddToTriple(uris.GeneralUris.AgeProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("Views"))
            {
                w.AddToTriple(uris.GeneralUris.ViewCountProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("UpVotes"))
            {
                w.AddToTriple(uris.GeneralUris.UpVotesProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("DownVotes"))
            {
                w.AddToTriple(uris.GeneralUris.DownVotesProperty, long.Parse(r.Value));
            }
            if (r.MoveToAttribute("WebsiteUrl"))
            {
                string websiteUrl = r.Value;
                if (!string.IsNullOrWhiteSpace(websiteUrl))
                {
                    if (websiteUrl.ToLowerInvariant().StartsWith("http"))
                    {
                        websiteUrl = "http" + websiteUrl.Substring(4);
                    }
                    if (!emptyWebsiteRegex.IsMatch(websiteUrl.ToLowerInvariant()))
                    {
                        if (websiteRegex.IsMatch(websiteUrl))
                        {
                            try {
                                Uri homepageUrl = new Uri(websiteUrl);
                                w.AddToTriple(uris.GeneralUris.WebsiteProperty, homepageUrl);
                            }
                            catch (UriFormatException) {
                                totalMalformedIriCount++;
                                if (malformedIris.Count < GlobalData.Options.MaxDisplayedMalformedIris)
                                {
                                    malformedIris.Add(websiteUrl);
                                }
                                w.AddToTriple(uris.GeneralUris.WebsiteProperty, websiteUrl);
                            }
                        }
                        else
                        {
                            totalMalformedIriCount++;
                            if (malformedIris.Count < GlobalData.Options.MaxDisplayedMalformedIris)
                            {
                                malformedIris.Add(websiteUrl);
                            }
                            w.AddToTriple(uris.GeneralUris.WebsiteProperty, websiteUrl);
                        }
                    }
                }
            }
            if (r.MoveToAttribute("AccountId"))
            {
                w.AddToTriple(uris.GeneralUris.AccountProperty, uris.GeneralUris.CreateAccountUri(r.Value));
                GlobalData.AccountIds.Add(r.Value);
            }
            if (r.MoveToAttribute("AboutMe"))
            {
                string desc = r.Value;
                if (!string.IsNullOrWhiteSpace(desc))
                {
                    w.AddToTriple(uris.GeneralUris.DescriptionProperty, desc);
                }
            }
            if (r.MoveToAttribute("LastAccessDate"))
            {
                w.AddToTriple(uris.GeneralUris.LastSeenProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
            }
        }
        private static void ConvertVote(SiteUris uris, XmlReader r, SequentialTurtleWriter w, UnknownValueStore <string> unknownVoteTypeIds)
        {
            Uri subjectUri;

            if (r.MoveToAttribute("Id"))
            {
                subjectUri = uris.CreateVoteUri(r.Value);
                w.StartTriple(subjectUri);
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return;
            }

            if (r.MoveToAttribute("VoteTypeId"))
            {
                switch (r.Value)
                {
                case "1":                         // acceptance
                    if (GlobalData.Options.FullTimeInfo)
                    {
                        w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.AcceptanceType);
                        uris.LinkToSite(w);
                        if (r.MoveToAttribute("PostId"))
                        {
                            w.StartTriple(uris.CreatePostUri(r.Value));
                            w.AddToTriple(uris.GeneralUris.AcceptanceProperty, subjectUri);
                            w.StartTriple(subjectUri);
                        }
                        if (r.MoveToAttribute("CreationDate"))
                        {
                            w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                        }
                    }
                    break;

                case "2":                         // upvote
                    if (GlobalData.Options.FullTimeInfo)
                    {
                        w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.UpVoteType);
                        uris.LinkToSite(w);
                        if (r.MoveToAttribute("CreationDate"))
                        {
                            w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                        }
                        if (r.MoveToAttribute("PostId"))
                        {
                            w.StartTriple(uris.CreatePostUri(r.Value));
                            w.AddToTriple(uris.GeneralUris.VoteProperty, subjectUri);
                            w.StartTriple(subjectUri);
                        }
                    }
                    break;

                case "3":                         // downvote
                    if (GlobalData.Options.FullTimeInfo)
                    {
                        w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.DownVoteType);
                        uris.LinkToSite(w);
                        if (r.MoveToAttribute("CreationDate"))
                        {
                            w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                        }
                        if (r.MoveToAttribute("PostId"))
                        {
                            w.StartTriple(uris.CreatePostUri(r.Value));
                            w.AddToTriple(uris.GeneralUris.VoteProperty, subjectUri);
                            w.StartTriple(subjectUri);
                        }
                    }
                    break;

                case "4":                         // offensive
                    break;

                case "5":                         // favorite
                    if (GlobalData.Options.FullTimeInfo)
                    {
                        w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.FavoriteType);
                        uris.LinkToSite(w);
                        if (r.MoveToAttribute("CreationDate"))
                        {
                            w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                        }
                        if (r.MoveToAttribute("PostId"))
                        {
                            w.AddToTriple(uris.GeneralUris.PostProperty, uris.CreatePostUri(r.Value));
                        }
                        if (r.MoveToAttribute("UserId"))
                        {
                            w.StartTriple(uris.CreateUserUri(r.Value));
                            w.AddToTriple(uris.GeneralUris.FavoriteProperty, subjectUri);
                            w.StartTriple(subjectUri);
                        }
                    }
                    else
                    {
                        if (r.MoveToAttribute("UserId"))
                        {
                            w.StartTriple(uris.CreateUserUri(r.Value));
                            if (r.MoveToAttribute("PostId"))
                            {
                                w.AddToTriple(uris.GeneralUris.FavoriteProperty, uris.CreatePostUri(r.Value));
                            }
                            w.StartTriple(subjectUri);
                        }
                    }
                    break;

                case "6":                         // closed
                    break;

                case "7":                         // reopened
                    break;

                case "8":                         // bounty started
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.StartOfBountyType);
                    if (r.MoveToAttribute("PostId"))
                    {
                        w.AddToTriple(uris.GeneralUris.PostProperty, uris.CreatePostUri(r.Value));
                    }
                    if (r.MoveToAttribute("UserId"))
                    {
                        w.AddToTriple(uris.GeneralUris.DonorProperty, uris.CreateUserUri(r.Value));
                    }
                    if (r.MoveToAttribute("CreationDate"))
                    {
                        w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                    }
                    if (r.MoveToAttribute("BountyAmount"))
                    {
                        w.AddToTriple(uris.GeneralUris.OfferedAmountProperty, long.Parse(r.Value));
                    }
                    break;

                case "9":                         // bounty closed
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.EndOfBountyType);
                    if (r.MoveToAttribute("PostId"))
                    {
                        w.AddToTriple(uris.GeneralUris.AnswerProperty, uris.CreatePostUri(r.Value));
                    }
                    if (r.MoveToAttribute("CreationDate"))
                    {
                        w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                    }
                    if (r.MoveToAttribute("BountyAmount"))
                    {
                        w.AddToTriple(uris.GeneralUris.TransferredAmountProperty, long.Parse(r.Value));
                    }
                    break;

                case "10":                         // deletion
                    break;

                case "11":                         // undeletion
                    break;

                case "12":                         // spam
                    break;

                case "13":                         // moderator informed
                    break;

                case "15":                         // under moderator review
                    break;

                case "16":                         // approved edit suggestion
                    break;

                default:
                    unknownVoteTypeIds.RegisterUnknownValue(r.Value);
                    break;
                }
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No VoteTypeId attribute found on element {0}. Skipping element.", r.ReadOuterXml());
            }
        }
        private static void ConvertPost(SiteUris uris, XmlReader r, SequentialTurtleWriter w, UnknownValueStore <string> unknownPostTypeIds)
        {
            Uri subjectUri;

            if (r.MoveToAttribute("Id"))
            {
                subjectUri = uris.CreatePostUri(r.Value);
                w.StartTriple(subjectUri);
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No Id attribute found on element {0}. Skipping element.", r.ReadOuterXml());
                return;
            }

            if (r.MoveToAttribute("PostTypeId"))
            {
                switch (r.Value)
                {
                case "1":                         // question
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.QuestionType);
                    uris.LinkToSite(w);
                    if (r.MoveToAttribute("AcceptedAnswerId"))
                    {
                        w.AddToTriple(uris.GeneralUris.AcceptedAnswerProperty, uris.CreatePostUri(r.Value));
                    }
                    if (r.MoveToAttribute("ViewCount"))
                    {
                        w.AddToTriple(uris.GeneralUris.ViewCountProperty, long.Parse(r.Value));
                    }
                    if (r.MoveToAttribute("Title"))
                    {
                        w.AddToTriple(uris.GeneralUris.TitleProperty, r.Value);
                        w.AddToTriple(uris.GeneralUris.LabelProperty, r.Value);
                    }
                    if (r.MoveToAttribute("Score"))
                    {
                        w.AddToTriple(uris.GeneralUris.ScoreProperty, long.Parse(r.Value));
                    }
                    break;

                case "2":                         // answer
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.AnswerType);
                    uris.LinkToSite(w);
                    if (r.MoveToAttribute("ParentId"))
                    {
                        w.StartTriple(uris.CreatePostUri(r.Value));
                        w.AddToTriple(uris.GeneralUris.AnswerProperty, subjectUri);
                        w.StartTriple(subjectUri);
                    }
                    else
                    {
                        ConsoleHelper.WriteWarningLine("Orphaned answer: {0}", subjectUri);
                    }
                    if (r.MoveToAttribute("Score"))
                    {
                        w.AddToTriple(uris.GeneralUris.ScoreProperty, long.Parse(r.Value));
                    }
                    break;

                case "3":                         // orphaned tag wiki
                    break;

                case "4":                         // tag info excerpt
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.TagExcerptType);
                    break;

                case "5":                         // tag description
                    w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.TagDescriptionType);
                    break;

                case "6":                         // moderator nomination
                    break;

                case "7":                         // "Wiki placeholder" (seems to only be the election description)
                    //w.AddToTriple(uris.GeneralUris.TypeProperty, uris.GeneralUris.SiteInfoType);
                    break;

                default:
                    unknownPostTypeIds.RegisterUnknownValue(r.Value);
                    break;
                }
                if (r.MoveToAttribute("CreationDate"))
                {
                    w.AddToTriple(uris.GeneralUris.DateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                }
                if (r.MoveToAttribute("LastEditDate"))
                {
                    w.AddToTriple(uris.GeneralUris.LastEditDateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                }
                if (r.MoveToAttribute("LastActivity"))
                {
                    w.AddToTriple(uris.GeneralUris.LastActivityDateProperty, DateTime.Parse(r.Value, System.Globalization.CultureInfo.InvariantCulture));
                }
                if (r.MoveToAttribute("OwnerUserId"))
                {
                    w.AddToTriple(uris.GeneralUris.OwnerProperty, uris.CreateUserUri(r.Value));
                }
                // TODO: LastEditorUserId (given in post history)
                // TODO: FavoriteCount (linked to users?)
                if (r.MoveToAttribute("Body"))
                {
                    w.AddToTriple(uris.GeneralUris.DescriptionProperty, r.Value);
                }
                if (r.MoveToAttribute("Tags"))
                {
                    w.AddToTriple(uris.GeneralUris.TagProperty,
                                  tagRegex.Matches(r.Value).Cast <Match>().Select(m => uris.CreateTagUri(m.Groups[1].Value)));
                }
            }
            else
            {
                r.MoveToElement();
                ConsoleHelper.WriteErrorLine("No PostTypeId attribute found on element {0}. Skipping element.", r.ReadOuterXml());
            }
        }