public Thread(HtmlDocument thread, int id, IdLookupFactory lookup) { UID = id; Lookup = lookup; random = new Random(); bool DoLookup = false; // should query for names? if (Lookup != null) { DoLookup = true; } HtmlNode body = thread.DocumentNode.FirstChild.ChildNodes.Where(w => w.Name == "body").ToList()[0]; List <HtmlNode> nodes = body.ChildNodes.Where(w => w.Name != "#text").ToList(); // eliminates whitespace text nodes Participants = CreateListOfParticipants(body.FirstChild.InnerText, DoLookup); Messages = new List <Message>(); for (int i = 0; i < nodes.Count; i += 2) { AddMessage(nodes[i].ChildNodes[0], nodes[i + 1]); // should be 1 in childnodes? } Messages.Reverse(); // because FB gives it most recent to least recent }
public void AddToCSV(string path) { List <string> uids = idNames.GetFirstKeys(); List <LookupResult> names = idNames.GetSecondKeys(); // super costly/shitty way of doing this IdLookupFactory duplicateCheck = new IdLookupFactory(path); for (int i = 0; i < idNames.Count; i++) { if (!duplicateCheck.ContainsUID(uids[i])) { LookupResult name = names[i]; File.AppendAllText(path, uids[i] + "," + name.Name + "," + name.IsReal + Environment.NewLine); } } }
static void Main(string[] args) { HtmlDocument all = new HtmlDocument(); string privateLocation = ConfigurationManager.AppSettings["private"]; /* * * all.Load(privateLocation + "/messages.htm"); * Console.WriteLine("Doc loaded"); * * int totalThreads = SeparateThreads(all); * Console.WriteLine("Separation complete: " + totalThreads); * * */ IdLookupFactory factory = new IdLookupFactory(privateLocation + @"\idNames.csv"); factory.GetUID("Ben Cooper"); int totalThreads = Directory.GetFiles(privateLocation + @"\threads").Length; /* * int threadId = 2; * HtmlDocument thread = new HtmlDocument(); * thread.Load(privateLocation + @"\threads\" + threadId.ToString() + ".html"); * Thread current = new Thread(thread, threadId, factory); * current.WriteJsonToFile(privateLocation + @"\jsonTests\" + threadId.ToString() + ".json", true); */ Console.WriteLine("Begin thread to JSON"); File.AppendAllText(privateLocation + @"\jsons\" + "manifest.json", "["); for (int i = 1; i < totalThreads + 1; i++) { HtmlDocument thread = new HtmlDocument(); // Console.WriteLine(i + " 1"); thread.Load(privateLocation + @"\threads\" + i.ToString() + ".html"); // Console.WriteLine(i + " 2"); Thread current = new Thread(thread, i, factory); //Console.WriteLine(i + " 3"); current.WriteMessageJsonToFile(privateLocation + @"\jsons\" + i + ".json"); //Console.WriteLine(i + " 4"); if (i == totalThreads) { current.WriteToManifest(privateLocation + @"\jsons\" + "manifest.json", true, true); } else { current.WriteToManifest(privateLocation + @"\jsons\" + "manifest.json", true, false); } if (i % 20 == 0) // just to document progress { Console.WriteLine((double)i / totalThreads * 100 + " % complete"); } } File.AppendAllText(privateLocation + @"\jsons\" + "manifest.json", "]"); factory.AddToCSV(privateLocation + @"\idNames.csv"); Console.Read(); }
/// <summary> /// Default contstructor /// </summary> /// <param name="participants">The list of participants (parsed elsewhere)</param> public Thread(List <Person> participants, IdLookupFactory lookup) { Messages = new List <Message>(); Participants = participants; Lookup = lookup; }