private PostMetaData GetMetaData(string settingsString) { var parsed = ParseSettings(settingsString); var metaData = new PostMetaData(); if (parsed.ContainsKey("author")) { metaData.Author = parsed["author"] as string; } if (parsed.ContainsKey("created")) { var strDate = parsed["created"] as string; var date = DateTime.Parse(strDate); metaData.CreatedDate = date; } if (parsed.ContainsKey("title")) { metaData.Title = parsed["title"] as string; } if (parsed.ContainsKey("categories")) { var categories = parsed["categories"] as string; if (categories != null) metaData.Categories = categories.Split(',').ToList(); } return metaData; }
/// <summary> /// Wraps the returning posts into Post objects. Uses JSON.net for deserialization /// </summary> /// <param name="site">the site url. insert without http:// prefix</param> /// <param name="type">the type of posts that shall be returned (post or page)</param> /// <param name="status">the status of posts that shall be returned</param> /// <param name="number">the number of posts that shall be returned</param> /// <param name="offset">the 0-indexed offset for the request. Default value goes to 0. Use this parameter for pagination.</param> /// <returns>List of all posts that matching the query</returns> public async Task <PostsList> GetPostList(string site, PostType type, PostStatus status, int?number = null, int?offset = null) { PostsList post_list = new PostsList(); var response = await getPosts(site, type, status, number, offset); if (response.StatusCode == System.Net.HttpStatusCode.OK) { var responseString = await response.Content.ReadAsStringAsync(); post_list = JsonConvert.DeserializeObject <PostsList>(responseString); if (post_list.posts_total_count != 0) { foreach (var item in post_list.posts_list) { //getting categories as string but handled as object to keep deserializing of posts possible if (item.categories != null) { var cat_object = item.categories; item.categories = PostCategories.GetString(cat_object); } //getting tags as string but handled as object to keep deserializing of posts possible if (item.tags != null) { var tags_object = item.tags; item.tags = PostTags.GetString(tags_object); } //getting attachments as List but handled as object to keep deserializing of posts possible if (item.attachments != null) { var attachments_obj = item.attachments; item.attachments = PostAttachments.GetList(attachments_obj); } //getting metadata as List but handled as object to keep deserializing of posts possible if (item.metadata != null) { var metadata_obj = item.metadata; item.metadata = PostMetaData.GetList(metadata_obj); } } } else { Debug.WriteLine("WARNING: GetPostList returned 0 results. Wrong parameters?"); } } else if (response.StatusCode != System.Net.HttpStatusCode.OK) { var responseString = await response.Content.ReadAsStringAsync(); var Error = JsonConvert.DeserializeObject <apiError>(responseString); Debug.WriteLine(string.Format("ERROR on GetPostList: The site returned: {0}. JetPack not installed on WordPress or JetPack JSON API not active?", Error.message)); } return(post_list); }
/// <summary> /// Depends on hn_full_11-07-2010.xml which you can download from: /// http://api.ihackernews.com/torrents/hn_full_11-07-2010.zip.torrent /// /// When run from debug make sure to change the default debug arguments. /// <param name="args">Full filepath to hn_full_11-07-2010.xml</param> static void Main(string[] args) { var filePath = args.First(); var elasticSettings = new ConnectionSettings("127.0.0.1.", 9200) .SetDefaultIndex("mpdreamz") .SetMaximumAsyncConnections(50); var client = new ElasticClient(elasticSettings); ConnectionStatus connectionStatus; if (!client.TryConnect(out connectionStatus)) { Console.Error.WriteLine("Could not connect to {0}:\r\n{1}", elasticSettings.Host, connectionStatus.Error.OriginalException.Message); Console.Read(); return; } var reader = new XmlTextReader(filePath); Post post = new Post(); PostMetaData meta = new PostMetaData(); int processed = 0, dropped = 0; Stopwatch sw = new Stopwatch(); sw.Start(); var postQueue = new List <Post>(); try { while (reader.Read()) { var name = reader.Name; if (reader.NodeType == XmlNodeType.Element) { if (name == "HackerNews") { continue; } if (name == "ID") { post.Id = reader.ReadElementContentAsInt(); } else if (name == "ParentID") { post.ParentId = reader.ReadElementContentAsInt(); } else if (name == "Url") { post.Url = reader.ReadElementContentAsString(); } else if (name == "Title") { post.Title = reader.ReadElementContentAsString(); } else if (name == "Text") { post.Text = reader.ReadElementContentAsString(); } else if (name == "Username") { meta.Username = reader.ReadElementContentAsString(); } else if (name == "Points") { meta.Points = reader.ReadElementContentAsInt(); } else if (name == "Type") { meta.Type = reader.ReadElementContentAsInt(); } else if (name == "Timestamp") { meta.Created = reader.ReadElementContentAsDateTime(); } else if (name == "CommentCount") { meta.CommentsCount = reader.ReadElementContentAsInt(); } } if (reader.NodeType == XmlNodeType.EndElement && name == "row") { post.Meta = meta; postQueue.Add(post); if (postQueue.Count() == 1000) { var t = client.IndexAsync <Post>(postQueue); t.ContinueWith(c => { var result = c.Result; if (!result.Success) { dropped++; } }); processed += postQueue.Count(); postQueue = new List <Post>(); } Console.Write("\rProcessed:{0}, Dropped:{2} in {1}", processed, sw.Elapsed, dropped); post = new Post(); meta = new PostMetaData(); } } if (postQueue.Count() > 0) { /*client.IndexAsync<Post>(postQueue, (c) => * { * if (!c.Success) * dropped++; * });*/ processed += postQueue.Count(); postQueue = new List <Post>(); } sw.Stop(); Console.WriteLine("\nDone!", sw.Elapsed); Console.WriteLine("{0} docs in {1} => {2} docs/s", processed, sw.Elapsed, processed / sw.Elapsed.TotalSeconds); } catch (Exception e) { Console.WriteLine(e.Message); } }