/** * Parses all the issues by an awardee */ private static List<Issue> ParseJIssues(JArray issuesArray) { List<Issue> ret_list = new List<Issue>(); WebClient wc = new WebClient(); if (issuesArray != null && issuesArray.Count > 0) { foreach (JObject obj in issuesArray) { try { Stream data = wc.OpenRead((String)obj["url"]); StreamReader reader = new StreamReader(data); string s = reader.ReadToEnd(); JObject jobj = JObject.Parse(s); JArray pagesArray = (JArray)jobj["pages"]; Issue i = new Issue(); Batch b = new Batch(); b.url = (String)(((JObject)jobj["batch"])["url"]); b.name = (String)(((JObject)jobj["batch"])["name"]); b.local_url = reloutput2 + ".json"; i.batch = b; i.date_issued = (String)obj["date_issued"]; i.volume = (String)jobj["volume"]; i.edition = (int)jobj["edition"]; i.number = (String)jobj["number"]; Title t = new Title(); JObject temp = (JObject)obj["title"]; t.name = (String)temp["name"]; t.url = (String)temp["url"]; i.title = t; i.url = (String)obj["url"]; output3 = output2 + "." + t.name.Remove(t.name.Length - 1) + "-" + i.date_issued; reloutput3 = reloutput2 + "." + t.name.Remove(t.name.Length - 1) + "-" + i.date_issued; i.local_url = reloutput3 + ".json"; i.pages = new List<Page>(); foreach (JObject pobj in pagesArray) //parsing only necessary fields here to avoid redundancy in data { Page p = new Page(); p.sequence = (int)pobj["sequence"]; p.url = (String)pobj["url"]; p.local_url = reloutput3 + ".Page" + p.sequence + ".json"; i.pages.Add(p); } //writing to the Issue file file = new System.IO.StreamWriter(output3 + ".json"); String output = JsonConvert.SerializeObject(i, Formatting.Indented); file.WriteLine(output); file.Close(); ret_list.Add(i); ParseJPages(pagesArray, i); } catch (Exception e) { continue; } } } return ret_list; }
private static List<Page> ParseJPages(JArray pagesArray, Issue i) { List<Page> ret_list = new List<Page>(); WebClient wc = new WebClient(); if (pagesArray != null && pagesArray.Count > 0) { foreach (JObject obj in pagesArray) { String partialPath = ((String)obj["url"]).Remove(((String)obj["url"]).IndexOf(".json")); Page p = new Page(); p.url = (String)obj["url"]; Issue i1 = new Issue(); i1.url = i.url; i1.local_url = i.local_url; i1.date_issued = i.date_issued; i1.local_url = reloutput3 + ".json"; p.issue = i1; p.jp2 = partialPath + ".jp2"; p.ocr = partialPath + "/ocr.xml"; p.pdf = partialPath + ".pdf"; p.sequence = (int)obj["sequence"]; p.text = partialPath + "/ocr.txt"; p.title = i.title; //writing the page file output4 = output3 + ".Page" + p.sequence; reloutput4 = reloutput3 + ".Page" + p.sequence; p.local_url = reloutput4 + ".json"; file = new System.IO.StreamWriter(output4 + ".json"); String output = JsonConvert.SerializeObject(p, Formatting.Indented); file.WriteLine(output); file.Close(); ret_list.Add(p); } } return ret_list; }
/** * Parses the data within a batch and writes it into a file within the batch folder */ private static List<Batch> ParseJBatch(JArray parseArray) { List<Batch> ret_list = new List<Batch>(); WebClient wc = new WebClient(); if (parseArray != null && parseArray.Count > 0) { foreach (JObject obj in parseArray) { try { Batch b = new Batch(); Awardee a = new Awardee(); JObject temp = (JObject)obj["awardee"]; a.name = (String)temp["name"]; a.url = (String)temp["url"]; b.awardee = a; b.ingested = (String)obj["ingested"]; JArray temp2 = (JArray)obj["lccns"]; b.lccns = temp2.Select(jv => (string)jv).ToList(); b.name = (String)obj["name"]; b.page_count = (int)obj["page_count"]; b.url = (String)obj["url"]; output2 = output1 + "." + (b.name); reloutput2 = reloutput1 + "." + (b.name); b.local_url = reloutput2 + ".json"; Stream data = wc.OpenRead((String)obj["url"]); StreamReader reader = new StreamReader(data); string s = reader.ReadToEnd(); JObject jobj = JObject.Parse(s); JArray issuesArray = (JArray)jobj["issues"]; b.issues = new List<Issue>(); foreach (JObject iobj in issuesArray) //parsing only necessary fields here to avoid redundancy in data { Issue i = new Issue(); i.date_issued = (String)iobj["date_issued"]; Title t = new Title(); JObject temp3 = (JObject)iobj["title"]; t.name = (String)temp3["name"]; t.url = (String)temp3["url"]; i.title = t; i.url = (String)obj["url"]; output3 = output2 + "." + t.name.Remove(t.name.Length - 1) + "-" + i.date_issued; reloutput3 = reloutput2 + "." + t.name.Remove(t.name.Length - 1) + "-" + i.date_issued; i.local_url = reloutput3 + ".json"; b.issues.Add(i); } //writing to the awardee file inside the batch file file = new System.IO.StreamWriter(output2 + ".json"); String output = JsonConvert.SerializeObject(b, Formatting.Indented); file.WriteLine(output); file.Close(); ret_list.Add(b); ParseJIssues(issuesArray); } catch(Exception e) { continue; } } } return ret_list; }