/// <summary> /// Creates a Message object from the two HTML tags necessary to do so /// See the README for an explanation so you know what I'm talking about /// </summary> /// <param name="messageHeader">The tag with class message_header</param> /// <param name="pStuff">Stuff in the p (the actual message)</param> /// <returns>A nice message object with the information</returns> private void AddMessage(HtmlNode messageHeader, HtmlNode paragraphTag) { HtmlNodeCollection attributes = messageHeader.ChildNodes; // first tag string facebookIdentifier = Utilities.CleanEmailAddress(attributes[0].InnerText); // this could be in the form of [email protected] or just simple first last name // process time (second tag) string rawTime = attributes[1].InnerText; Moment time = new Moment(rawTime.Remove(rawTime.Length - 4, 4), rawTime.Remove(0, rawTime.Length - 3)); string senderId; string senderName; if (facebookIdentifier.Contains("@")) { // if it's in the form of [email protected] senderId = facebookIdentifier.Remove(facebookIdentifier.Length - 13, 13); senderName = Lookup.GetName(senderId).Name; } else { // if it's just first and last name try { senderId = Lookup.GetUID(facebookIdentifier); // should have already been added at the beginning of the thread } catch (Exception e) { if (Participants.Count - totalReal == 1) { // change participants list Person unknown = Participants.Where(p => p.Name.Contains("Unknown")).ToList()[0]; unknown.Name = facebookIdentifier; unknown.RealName = true; // change Lookupfactory Lookup.ChangeName(unknown.UID, new LookupResult(facebookIdentifier, true)); senderId = unknown.UID; } else { senderId = Math.Floor((random.NextDouble() * 100000)).ToString(); } } senderName = facebookIdentifier; } // fixes emojis coded in with hex values and those simply represented by plain text like :) string messageText = ReplacePlainTextEmojis(FixEmojiEncoding(paragraphTag.InnerText)); Messages.Add(new Message(messageText, time, UID, senderId, senderName)); }
static void Main(string[] args) { HtmlDocument all = new HtmlDocument(); string privateLocation = ConfigurationManager.AppSettings["private"]; /* * * all.Load(privateLocation + "/messages.htm"); * Console.WriteLine("Doc loaded"); * * int totalThreads = SeparateThreads(all); * Console.WriteLine("Separation complete: " + totalThreads); * * */ IdLookupFactory factory = new IdLookupFactory(privateLocation + @"\idNames.csv"); factory.GetUID("Ben Cooper"); int totalThreads = Directory.GetFiles(privateLocation + @"\threads").Length; /* * int threadId = 2; * HtmlDocument thread = new HtmlDocument(); * thread.Load(privateLocation + @"\threads\" + threadId.ToString() + ".html"); * Thread current = new Thread(thread, threadId, factory); * current.WriteJsonToFile(privateLocation + @"\jsonTests\" + threadId.ToString() + ".json", true); */ Console.WriteLine("Begin thread to JSON"); File.AppendAllText(privateLocation + @"\jsons\" + "manifest.json", "["); for (int i = 1; i < totalThreads + 1; i++) { HtmlDocument thread = new HtmlDocument(); // Console.WriteLine(i + " 1"); thread.Load(privateLocation + @"\threads\" + i.ToString() + ".html"); // Console.WriteLine(i + " 2"); Thread current = new Thread(thread, i, factory); //Console.WriteLine(i + " 3"); current.WriteMessageJsonToFile(privateLocation + @"\jsons\" + i + ".json"); //Console.WriteLine(i + " 4"); if (i == totalThreads) { current.WriteToManifest(privateLocation + @"\jsons\" + "manifest.json", true, true); } else { current.WriteToManifest(privateLocation + @"\jsons\" + "manifest.json", true, false); } if (i % 20 == 0) // just to document progress { Console.WriteLine((double)i / totalThreads * 100 + " % complete"); } } File.AppendAllText(privateLocation + @"\jsons\" + "manifest.json", "]"); factory.AddToCSV(privateLocation + @"\idNames.csv"); Console.Read(); }