public static void ReadDir(Regulator r, string dir, string to, string boundary) { Globals.LOG.AppendLocation(); // working subdirectory string sub = new DirectoryInfo(dir).Name; string sub_path = to + "/" + sub; string[] sub_paths = SortNumerically(Directory.GetFiles(dir, "*.txt", SearchOption.AllDirectories)).ToArray(); if (sub_paths.Length > 0) { Directory.CreateDirectory(sub_path); Console.WriteLine("subscription = " + sub); Globals.LOG.AppendMessage("Begin read for subscription = " + sub); // parse regulation file then purge from working directory foreach (string file in sub_paths) { ReadFile(file, sub_path, boundary); // read if (!r.ParseReg()) { return; } // parse + purge } } }
public static void Main(string[] args) { Console.WriteLine(transferProcessed); // read inflow directory try { //ReadDir(start_folder + "/" + CFR, end_folder); //ReadDir(start_folder + "/" + PUC, end_folder); //ReadDir(start_folder + "/" + NCS, end_folder); //ReadDir(start_folder + "/" + NSL, end_folder); //ReadDir(start_folder + "/" + NYCR, end_folder); //ReadDir(start_folder + "/" + NYSL, end_folder); } catch (Exception e) { Console.WriteLine("Incorrect start_folder. Verify that subscription id exists."); } // configure nested table of contents [meta] data List <string> meta = new List <string> { "{http://wwww.w3.org/2005/Atom}title", // 0 - Source / Regulation Body "{http://wwww.w3.org/2005/Atom}subtitle", // 1 - File Name "{http://wwww.w3.org/2005/Atom}id", // 2 - File Identifier "{http://wwww.w3.org/2005/Atom}updated", // 3 - File Date Updated "{http://services.lexisnexis.com/interfaces/publish/lnpub/1/}publishType", // 4 - File Publish Type "{http://wwww.w3.org/2005/Atom}entry" // pointer }; List <string> metameta = new List <string> { "{http://wwww.w3.org/2005/Atom}title", // 5 - Content Identifier "{http://wwww.w3.org/2005/Atom}updated", // 6 - Content Date Updated "{http://services.lexisnexis.com/interfaces/publish/lnpub/1/}action" // 7 - Content Action }; // configure content data Regulator c = new Regulator(end_folder, CFR, meta, metameta, new List <string> { "citations", // 0 - Content Citation "jurisSystem[normalizedLongName]", // 1 - Content Jurisdiction "hierarchyLevel[levelType=@title]/heading", // 2 - Content Title 1 [Title] "hierarchyLevel[levelType=@subtitle]/heading", // 3 - Content Title 2 [Subtitle] "hierarchyLevel[levelType=@chapter]/heading", // 4 - Content Topic 1 [Chapter] "hierarchyLevel[levelType=@subchapter]/heading", // 5 - Content Topic 2 [Subchapter] "hierarchyLevel[levelType=@part]/heading", // 6 - Content Section 1 [Part] "hierarchyLevel[levelType=@subpart]/heading", // 7 - Content Section 2 [Subpart] "hierarchyLevel[levelType=@section]/heading", // 8 - Content Section Description [Section] "hierarchyLevel[levelType=@subsection]/heading", // 9 - Content Subsection 1 [Subsection] "hierarchyLevel[levelType=@unclassified]/heading", //10 - Content Subsection 2 [Unclassified] "administrativeCode", //11 - Content Description [Description] "historyItem", //12 - Content Reference Citation [Reference Citations] }); Regulator p = new Regulator(end_folder, PUC, meta, metameta, new List <string> { "citations", "jurisSystem[normalizedLongName]", "governmentBodyName[normalizedLongName]", null, "fullCaseName", "docketNumber", null, "decisionDate", null, null, null, "administrativeDocBody", null, }); Regulator s = new Regulator(end_folder, NCS, meta, metameta, new List <string> { "citations", "jurisSystem[normalizedLongName]", "hierarchyLevel[levelType=@title]/heading", "hierarchyLevel[levelType=@subtitle]/heading", "hierarchyLevel[levelType=@chapter]/heading", "hierarchyLevel[levelType=@subchapter]/heading", "hierarchyLevel[levelType=@part]/heading", "hierarchyLevel[levelType=@subpart]/heading", "hierarchyLevel[levelType=@section]/heading", "hierarchyLevel[levelType=@subsection]/heading", "hierarchyLevel[levelType=@unclassified]/heading", "legislativeDocBody", null, }); Regulator statutes = new Regulator(end_folder, NSL, meta, metameta, new List <string> { "citations", "jurisSystem[normalizedLongName]", "hierarchyLevel[levelType=@topic]/heading", null, "hierarchyLevel[levelType=@article]/heading", "hierarchyLevel[levelType=@title]/heading", null, null, null, null, null, "legislativeDocBody", "history", }); Regulator city_regs = new Regulator(end_folder, NYCR, meta, metameta, new List <string> { "citations", "jurisSystem[normalizedLongName]", "hierarchyLevel[levelType=@topic]/heading", "hierarchyLevel[levelType=@rule]/heading", "hierarchyLevel[levelType=@title]/heading", null, "hierarchyLevel[levelType=@chapter]/heading", null, "hierarchyLevel[levelType=@section]/heading", null, null, "legislativeDocBody", null, }); Regulator state_legis = new Regulator(end_folder, NYSL, meta, metameta, new List <string> { "citations", "jurisSystem[normalizedLongName]", "hierarchyLevel[levelType=@title]/heading", "hierarchyLevel[levelType=@subtitle]/heading", "hierarchyLevel[levelType=@chapter]/heading", "hierarchyLevel[levelType=@subchapter]/heading", "hierarchyLevel[levelType=@part]/heading", "hierarchyLevel[levelType=@subpart]/heading", "hierarchyLevel[levelType=@section]/heading", "hierarchyLevel[levelType=@subsection]/heading", "hierarchyLevel[levelType=@unclassified]/heading", "legislativeDocBody", null, }); // interpret / parse content data as regulations List <List <Regulation> > regs = new List <List <Regulation> >(); //regs.Add(c.ParseReg()); //regs.Add(p.ParseReg()); //regs.Add(s.ParseReg()); //regs.Add(statutes.ParseReg()); //regs.Add(city_regs.ParseReg()); //regs.Add(state_legis.ParseReg()); Directory.CreateDirectory(processed + "/" + CFR); // create the processed directory if it does not exist if (transferProcessed) { MoveDir(start_folder + "/" + CFR, processed + "/" + CFR); // move processed content } try { Directory.Delete(end_folder, !keepRegulationContent); } catch (Exception e) { } // write regulation attributes to db try { cnn.Open(); Guid g = Guid.NewGuid(); SqlCommand SqlComm = new SqlCommand("INSERT INTO " + job_table + "(Job_Id, Job_Type, Start_Time) VALUES(@guid, @task, @start)", cnn); SqlComm.Parameters.AddWithValue("@guid", g); SqlComm.Parameters.AddWithValue("@task", "Parsing XML"); SqlComm.Parameters.AddWithValue("@start", DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt")); try { SqlComm.ExecuteNonQuery(); } catch (Exception e) { Console.WriteLine(e); } //SqlComm.Dispose(); foreach (List <Regulation> body in regs) { foreach (Regulation reg in body) { Guid file_g = Guid.NewGuid(); SqlComm = new SqlCommand("INSERT INTO " + file_table + "(Tbl_id, Subscription_id, File_name, Start_Time) VALUES(@guid, @sub, @file, @start)", cnn); SqlComm.Parameters.AddWithValue("@guid", file_g); SqlComm.Parameters.AddWithValue("@sub", reg.getSubscription()); SqlComm.Parameters.AddWithValue("@file", reg.getMeta(1)); SqlComm.Parameters.AddWithValue("@start", DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt")); SqlComm.ExecuteNonQuery(); // general configurations -- string manipulation on params should be localized here Console.WriteLine("Generating SQL Command."); SqlComm = new SqlCommand("INSERT INTO " + content_table + "(Subscription_Id, File_Name, Content_Id, " + "Publish_Type, Action, Updated, Jurisdiction, Citation, Regulation_Type, Body, Title1, " + "Title2, Topic1, Topic2, Section1, Section2, Section_Description, SubSection1, SubSection2, Description, RefCitation, " + "Date_Type, Actual_Date) VALUES(@sub, @file, @content, @pub, @action, @updated, @juris, " + "@citation, @type, @source, @title_1, @title_2, @topic_1, @topic_2, @sec_1, @sec_2, @secdescrip," + "@subsec1, @subsec2, @descrip, @refcite, @dateType, @actualDate)", cnn); SqlComm.Parameters.AddWithValue("@sub", reg.getSubscription()); SqlComm.Parameters.AddWithValue("@file", reg.getMeta(1)); SqlComm.Parameters.AddWithValue("@content", reg.getMeta(5).Substring(reg.getMeta(5).IndexOf("urn:contentItem:") + 16)); SqlComm.Parameters.AddWithValue("@pub", reg.getMeta(4)); SqlComm.Parameters.AddWithValue("@action", reg.getMeta(7)); SqlComm.Parameters.AddWithValue("@updated", reg.getMeta(6)); SqlComm.Parameters.AddWithValue("@juris", reg.getColumn(1)); SqlComm.Parameters.AddWithValue("@citation", reg.getColumn(0)); SqlComm.Parameters.AddWithValue("@type", ""); SqlComm.Parameters.AddWithValue("@source", reg.getMeta(0)); SqlComm.Parameters.AddWithValue("@title_1", reg.getColumn(2)); SqlComm.Parameters.AddWithValue("@title_2", reg.getColumn(3)); SqlComm.Parameters.AddWithValue("@topic_1", reg.getColumn(4)); SqlComm.Parameters.AddWithValue("@topic_2", reg.getColumn(5)); SqlComm.Parameters.AddWithValue("@sec_1", reg.getColumn(6)); SqlComm.Parameters.AddWithValue("@sec_2", reg.getColumn(7)); SqlComm.Parameters.AddWithValue("@secdescrip", reg.getColumn(10)); SqlComm.Parameters.AddWithValue("@subsec1", reg.getColumn(8)); SqlComm.Parameters.AddWithValue("@subsec2", reg.getColumn(9)); SqlComm.Parameters.AddWithValue("@descrip", reg.getColumn(11)); SqlComm.Parameters.AddWithValue("@refcite", reg.getColumn(12)); SqlComm.Parameters.AddWithValue("@dateType", ""); SqlComm.Parameters.AddWithValue("@actualDate", ""); SqlComm.ExecuteNonQuery(); SqlComm = new SqlCommand("UPDATE " + file_table + " SET End_Time=@end, Content_Item=@content WHERE Tbl_id=@guid", cnn); SqlComm.Parameters.AddWithValue("@guid", file_g); SqlComm.Parameters.AddWithValue("@content", reg.getMeta(2)); SqlComm.Parameters.AddWithValue("@end", DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt")); SqlComm.ExecuteNonQuery(); } SqlComm = new SqlCommand("UPDATE " + job_table + " SET End_Time=@end WHERE Job_Id=@guid", cnn); SqlComm.Parameters.AddWithValue("@guid", g); SqlComm.Parameters.AddWithValue("@end", DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt")); SqlComm.ExecuteNonQuery(); } cnn.Close(); } catch (Exception e) { Console.WriteLine(e); } }