private static ThreadContainer parse_ffuuka_json(FoolFuukaParserData ffp_data) { ThreadContainer tc = null; string data = fetch_api(ffp_data); JsonObject response = JsonConvert.Import <JsonObject>(data); JsonObject threadObject = (JsonObject)response[ffp_data.ThreadID.ToString()]; JsonObject opPost = (JsonObject)threadObject["op"]; tc = new ThreadContainer(parse_thread(opPost, ffp_data)); JsonObject postsObject = (JsonObject)threadObject["posts"]; foreach (string reply_id in postsObject.Names.Cast <string>()) { JsonObject replyObject = (JsonObject)postsObject[reply_id]; GenericPost reply = parse_reply(replyObject, ffp_data); tc.AddReply(reply); continue; } return(tc); }
/*public static CommentToken[] TokenizeComment(string comment) * { * List<CommentToken> tokens = new List<CommentToken>(); * * HtmlDocument d = new HtmlDocument(); * * d.LoadHtml(comment); * * foreach (HtmlNode node in d.DocumentNode.ChildNodes) * { * switch (node.Name) * { * case "#text": * tokens.Add(new CommentToken(CommentToken.TokenType.Text, HttpUtility.HtmlDecode(node.InnerText))); * break; * case "a": * if (node.GetAttributeValue("class", "") == "quotelink") * { * string inner_text = HttpUtility.HtmlDecode(node.InnerText); * if (inner_text.StartsWith(">>>")) * { * //board redirect (sometimes with a post number) * int test_i = -1; * try * { * test_i = Convert.ToInt32(inner_text.Split('/').Last()); // The last should be a number or an empty string. I guess * * //if success, it's a board_thread_redirect OR it's a cross-thread link ( I don't know if 4chan handle both the same way ) * * string board_letter = inner_text.Replace(">", "").Replace("/", "").Replace(test_i.ToString(), ""); * * tokens.Add(new CommentToken(CommentToken.TokenType.BoardThreadRedirect, board_letter + "-" + test_i.ToString())); * * } * catch (Exception) * { * // it is a plain board redirect such as >>>/g/ * tokens.Add(new CommentToken(CommentToken.TokenType.BoardRedirect, inner_text.Replace(">", "").Replace("/", ""))); // ex: >>>/g/ -> g * } * } * else if (inner_text.StartsWith(">>")) * { * int test_i = -1; * try * { * test_i = Convert.ToInt32(inner_text.Remove(0, 2)); * //it's a post quote link * tokens.Add(new CommentToken(CommentToken.TokenType.Quote, inner_text.Remove(0, 2))); * } * catch (Exception) * { * throw; * } * } * } * else * { * //throw new Exception("Unsupported data type"); * } * break; * case "br": * tokens.Add(new CommentToken(CommentToken.TokenType.Newline, "")); * break; * case "wbr": * //no action * break; * case "span": * if (node.GetAttributeValue("class", "") == "quote") * { * tokens.Add(new CommentToken(CommentToken.TokenType.GreenText, HttpUtility.HtmlDecode(node.InnerText))); * } * else if (node.GetAttributeValue("class", "") == "deadlink") * { * //dead link * string inner_text = HttpUtility.HtmlDecode(node.InnerText); * int test_i = -1; * try * { * test_i = Convert.ToInt32(inner_text.Remove(0, 2)); * //it's a post quote link * tokens.Add(new CommentToken(CommentToken.TokenType.DeadLink, inner_text.Remove(0, 2))); * } * catch (Exception) * { * throw; * } * * } * else if (node.GetAttributeValue("class", "") == "fortune") * { * string data = HttpUtility.HtmlDecode(node.InnerText); * string color = node.GetAttributeValue("style", ""); * tokens.Add(new CommentToken(CommentToken.TokenType.ColoredFText, "#" + color.Split('#')[1] + "$" + data)); * } * else * { * //throw new Exception("Unsupported data type"); * } * break; * case "pre": * if (node.GetAttributeValue("class", "") == "prettyprint") * { * StringBuilder sb = new StringBuilder(); * foreach (HtmlNode prenode in node.ChildNodes) * { * if (prenode.Name == "br") * { * sb.AppendLine(); * } * else * { * sb.Append(prenode.InnerText); * } * } * tokens.Add(new CommentToken(CommentToken.TokenType.CodeBlock, sb.ToString())); * } * break; * case "s": * tokens.Add(new CommentToken(CommentToken.TokenType.SpoilerText, HttpUtility.HtmlDecode(node.InnerText))); * break; * case "small": * //Oekaki Post * break; * default: * //throw new Exception("Unsupported data type"); * break; * } * } * return tokens.ToArray(); * } */ public static string Guess_Post_Title(GenericPost t) { if (String.IsNullOrEmpty(t.Subject)) { if (String.IsNullOrEmpty(t.Comment)) { return(t.ID.ToString()); } else { string comment = ""; HtmlAgilityPack.HtmlDocument d = new HtmlAgilityPack.HtmlDocument(); d.LoadHtml(t.Comment); comment = HttpUtility.HtmlDecode(d.DocumentNode.InnerText); if (comment.Length > 25) { return(comment.Remove(24) + "..."); } else { return(comment); } } } else { return(HttpUtility.HtmlDecode(t.Subject)); } }
private static GenericPost parse_reply(JsonObject data, FoolFuukaParserData ffp_data) { GenericPost gp = new GenericPost(); gp.Board = ffp_data.BOARD; gp.ID = Convert.ToInt32(data["num"]); if (data["comment_processed"] != null) { gp.Comment = data["comment_processed"].ToString(); } if (data["email"] != null) { gp.Email = data["email"].ToString(); } if (data["title"] != null) { gp.Subject = data["title"].ToString(); } if (data["media"] != null) { gp.File = parse_file(data, ffp_data, gp); } if (data["capcode"] != null) { switch (data["capcode"].ToString()) { case "N": gp.Capcode = GenericPost.CapcodeEnum.None; break; default: gp.Capcode = GenericPost.CapcodeEnum.None; break; } } if (data["name"] != null) { gp.Name = data["name"].ToString(); } if (data["trip"] != null) { gp.Trip = data["trip"].ToString(); } gp.Time = AniWrap.Common.ParseUTC_Stamp(Convert.ToInt32(data["timestamp"])); return(gp); }
public void savePost(string board, int tid, int postId, GenericPost post) { string threadDirectory = Path.Combine(Program.post_files_dir, board, tid.ToString()); Directory.CreateDirectory(threadDirectory); string jsonFilePath; if (post.IsOpPost) { jsonFilePath = Path.Combine(threadDirectory, "op.json"); } else { jsonFilePath = Path.Combine(threadDirectory, postId.ToString() + ".json"); } if (!File.Exists(jsonFilePath)) { string content = get_post_string(post); File.WriteAllText(jsonFilePath, content); } }
private void worker_DoWork(object sender, DoWorkEventArgs e) { string thread_folder = Path.Combine(Program.post_files_dir, this.Board.Board, this.ID.ToString()); Directory.CreateDirectory(thread_folder); System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); while (running) { sw.Reset(); try { sw.Start(); log(new LogEntry() { Level = LogEntry.LogLevel.Info, Message = "Updating thread...", Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); var tc = Program.aw.GetThreadData(this.Board.Board, this.ID); this.ThreadTitle = tc.Title; if (!can_i_run(tc.Instance)) { log(new LogEntry() { Level = LogEntry.LogLevel.Info, Message = "ThreadWorker stopped because of a filter", Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); running = false; ThreadStore.GetStorageEngine().DeleteThread(this.Board.Board, this.ID.ToString()); break; } if (this.AddedAutomatically && this.Board.Mode == BoardWatcher.BoardMode.Harvester) { if (tc.Instance.File != null) { if (this.Board.IsFileAllowed(tc.Instance.File.ext)) { savePost(tc.Instance); Program.dump_files(tc.Instance.File, this.ThumbOnly); } } } else { savePost(tc.Instance); if (tc.Instance.File != null) { Program.dump_files(tc.Instance.File, this.ThumbOnly); } } int count = tc.Replies.Count(); int with_image = 0; for (int i = 0; i < count; i++) { if (this.AddedAutomatically && this.Board.Mode == BoardWatcher.BoardMode.None) { continue; } if (this.AddedAutomatically && this.Board.Mode == BoardWatcher.BoardMode.Harvester) { if (tc.Replies[i].File != null) { if (!this.Board.IsFileAllowed(tc.Replies[i].File.ext)) { continue; } } else { continue; } } GenericPost replyPost = tc.Replies[i]; savePost(replyPost); if (tc.Replies[i].File != null) { ++with_image; Program.dump_files(tc.Replies[i].File, this.ThumbOnly); } } sw.Stop(); this.ImageLimitReached = with_image >= this.ImageLimit; int new_rc = count - old_replies_count; log(new LogEntry() { Level = LogEntry.LogLevel.Success, Message = string.Format("Updated in {0} seconds {1}", sw.Elapsed.Seconds, new_rc > 0 ? ", + " + new_rc.ToString() + " new replies" : ""), Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); old_replies_count = count; this.LastUpdated = DateTime.Now; if (count >= this.BumpLimit) { this.AutoSage = true; //auto-sage mode, we must archive faster if (this.Board.Speed == BoardWatcher.BoardSpeed.Fast) { this.UpdateInterval = 0.16; //each 10 sec } else if (this.Board.Speed == BoardWatcher.BoardSpeed.Normal) { this.UpdateInterval = 1; //each 60 sec } } if (tc.Instance.IsSticky) { this.UpdateInterval = 5; } if (tc.Instance.IsArchived) { log(new LogEntry() { Level = LogEntry.LogLevel.Info, Message = string.Format("Thread entered archived state."), Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); ThreadStore.GetStorageEngine().OptimizeThread(this.Board.Board, this.ID); this.Stop(); if (Settings.RemoveThreadsWhenTheyEnterArchivedState && !this.AddedAutomatically) { Thread404(this); } else { goto stop; } } if (this.Board.Mode == BoardWatcher.BoardMode.Harvester) { this.UpdateInterval = 2; } System.Threading.Thread.Sleep(Convert.ToInt32(this.UpdateInterval * 60 * 1000)); } catch (Exception ex) { if (ex.Message.Contains("404")) { log(new LogEntry() { Level = LogEntry.LogLevel.Info, Message = string.Format("Optimizing thread data..."), Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); if (!(this.AddedAutomatically && this.Board.Mode == BoardWatcher.BoardMode.Harvester)) { ThreadStore.GetStorageEngine().OptimizeThread(this.Board.Board, this.ID); } this.Stop(); Thread404(this); goto stop; } else { log(new LogEntry() { Level = LogEntry.LogLevel.Fail, Message = string.Format("An error occured '{0}' @ '{1}', retrying", ex.Message, ex.StackTrace), Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); System.Threading.Thread.Sleep(1000); } } } stop: log(new LogEntry() { Level = LogEntry.LogLevel.Success, Message = "Stopped thread worker successfully", Sender = "ThreadWorker", Title = string.Format("/{0}/ - {1}", this.Board.Board, this.ID) }); }
public void savePost(string board, int tid, int postId, GenericPost post) { }
private static PostFile parse_file(JsonObject data, FoolFuukaParserData ffp_data, GenericPost owner) { if (data["media"] != null) { JsonObject media = (JsonObject)data["media"]; if (media.Count == 0) { return(null); } if (media["banned"].ToString() != "0") { return(null); } if (media["media_status"].ToString() == "not-available") { return(null); } PostFile pf = new PostFile(); pf.board = ffp_data.BOARD; pf.filename = media["media_filename_processed"].ToString(); pf.ext = pf.filename.Split('.').Last(); pf.filename = pf.filename.Split('.').First(); string thumb_link = media["thumb_link"].ToString(); string media_link = media["media_link"].ToString(); if (string.IsNullOrEmpty(media_link)) { return(null); } pf.OverrideFileLinks(thumb_link, media_link); pf.hash = media["media_hash"].ToString(); pf.height = Convert.ToInt32(media["media_h"]); pf.width = Convert.ToInt32(media["media_w"]); if (media["spoiler"] != null) { pf.IsSpoiler = (media["spoiler"].ToString() != "0"); } pf.thumbH = Convert.ToInt32(media["preview_h"]); pf.thumbW = Convert.ToInt32(media["preview_w"]); pf.size = Convert.ToInt32(media["media_size"]); pf.thumbnail_tim = media["media"].ToString().Split('.').First(); pf.owner = owner; return(pf); } else { return(null); } }
private string get_post_string(GenericPost gp) { JsonObject jObject = new JsonObject(); if (gp.IsOpPost) { AniWrap.DataTypes.Thread t = (AniWrap.DataTypes.Thread)gp; jObject.Put("Closed", t.IsClosed); jObject.Put("Sticky", t.IsSticky); } jObject.Put("Board", gp.Board); jObject.Put("ID", gp.ID); jObject.Put("Name", gp.Name); if (gp.Capcode != GenericPost.CapcodeEnum.None) { jObject.Put("Capcode", gp.Capcode.ToString()); } if (!string.IsNullOrEmpty(gp.Comment)) { jObject.Put("RawComment", Wordfilter.Process(gp.Comment)); // dic.Add("FormattedComment", gp.CommentText); } /*// Flag stuffs*/ if (!string.IsNullOrEmpty(gp.country_flag)) { jObject.Put("CountryFlag", gp.country_flag); } if (!string.IsNullOrEmpty(gp.country_name)) { jObject.Put("CountryName", gp.country_name); } /* Flag stuffs //*/ if (!string.IsNullOrEmpty(gp.Email)) { jObject.Put("Email", gp.Email); } if (!string.IsNullOrEmpty(gp.Trip)) { jObject.Put("Trip", gp.Trip); } if (!string.IsNullOrEmpty(gp.Subject)) { jObject.Put("Subject", gp.Subject); } if (!string.IsNullOrEmpty(gp.PosterID)) { jObject.Put("PosterID", gp.PosterID); } jObject.Put("Time", gp.Time.ToString()); if (gp.File != null) { jObject.Put("FileHash", Program.base64tostring(gp.File.hash)); jObject.Put("FileName", Wordfilter.Process(gp.File.filename) + "." + gp.File.ext); jObject.Put("ThumbTime", gp.File.thumbnail_tim); jObject.Put("FileHeight", gp.File.height); jObject.Put("FileWidth", gp.File.width); jObject.Put("FileSize", gp.File.size); } return(jObject.ToString()); }
private GenericPost ParseReply(JsonObject data, string board) { GenericPost t = new GenericPost(); t.Board = board; //comment if (data["com"] != null) { t.Comment = data["com"].ToString(); } else { t.Comment = ""; } //mail if (data["email"] != null) { t.Email = HttpUtility.HtmlDecode(data["email"].ToString()); } else { t.Email = ""; } //poster name if (data["name"] != null) { t.Name = HttpUtility.HtmlDecode(data["name"].ToString()); } else { t.Name = ""; } //subject if (data["sub"] != null) { t.Subject = HttpUtility.HtmlDecode(data["sub"].ToString()); } else { t.Subject = ""; } if (data["trip"] != null) { t.Trip = data["trip"].ToString(); } else { t.Trip = ""; } if (data["id"] != null) { t.PosterID = data["id"].ToString(); } else { t.PosterID = ""; } if (data["country"] != null) { t.country_flag = data["country"].ToString(); } else { t.country_flag = ""; } if (data["country_name"] != null) { t.country_name = data["country_name"].ToString(); } else { t.country_name = ""; } if (data["capcode"] != null) { t.Capcode = parse_capcode(Convert.ToString(data["capcode"])); } t.File = ParseFile(data, board); if (t.File != null) { t.File.owner = t; } t.ID = Convert.ToInt32(data["no"]);; t.Time = Common.ParseUTC_Stamp(Convert.ToInt32((data["time"]))); return(t); }