private bool ParseImpl() { MatchCollection matches; DateTime currentDate; String link; // Retrieve all properties on each index if (retrieveMatchesFromIndex(out matches)) { // Iterate thru each property foreach (Match match in matches) { if (!bIsRunning) { bIsRunning = true; } if (bForceStop) { bForceStop = false; bIsRunning = false; return(false); } // Get the Date and link of each property if (isMatchCorrect(match, out currentDate, out link)) { if (CheckDateTimeWithRegistry(ref currentDate)) { GetPropertyPage(link); string desc = ""; string found = ""; PropertyDetails pd = new PropertyDetails(); if (p_Parser.Iterate_Over_COMMON_Parse_Functions(ref desc, ref found, ref pd)) { FileAppend((desc + ".txt"), found, false); ++successfully_parsed; } else { // We want to write out only changed Dates into the HTML WriteNotParsedPropertyToFile(HasDateTimeChanged(ref currentDate), ref currentDate, link); ++unsuccessfully_parsed; } System.Threading.Thread.Sleep(SLEEP_TIME_OUT_AFTER_EVERY_PROPERTY); } else { // we are done NO MORE PARSING IS REQUIRED // exit here SOMEHOW // bDone = true; return(false); } } } } return(false); }
private bool COMMON_Parse_Functions(string WIDE_PARSE_NAME, ref string desc, ref string found, ref PropertyDetails pd) { // temp Regex Widerx; Match Widematch; Regex specificrx; Match specificmatch; int it = 1; // Try Narrowing Down the Wide Search into Specific Types foreach (string usingNarrowingDownType in COMMON_Get_NarrowingDown_ParseConditions()) { Widerx = new Regex(COMMON_Parse_RegularExpressionsWide(WIDE_PARSE_NAME) + COMMON_Parse_RegularExpressionsNarrowingDown(usingNarrowingDownType)); Widematch = Widerx.Match(p_DailyReport.CURRENT_PROPERTY_PAGE); if (Widematch.Groups.Count > 1) { // description is important desc = it.ToString() + ") " + usingNarrowingDownType + " - " + WIDE_PARSE_NAME; found = Widematch.Groups[0].ToString(); // very good but not perfect match // (a few inconsistencies remain): specificrx = COMMON_Parse_RegularExpressionsVerySpecific(usingNarrowingDownType); specificmatch = specificrx.Match(Widematch.Groups["compl"].ToString()); // If Found using Wide + Narrowing down - we now need to match Specific and // try getting the data ParsePropertyDetail(ref Widematch, WIDE_PARSE_NAME, usingNarrowingDownType, ref pd); return(true); } ++it; } return(false); }
// used ONLY to actualy now finally get the actual values - IMP public void ParsePropertyDetail(ref Match tmatch, string WideCondition, string NarrowCondition, ref PropertyDetails pd) { // property details pd.isApt = false; pd.City = ""; pd.State = "Georgia"; pd.Address = ""; pd.AptNo = ""; pd.ZipCode = 0; pd.SQLlink = 0; // All of them have an address field pd.Address = p_Cleanup.AddressCleanup(tmatch.Groups["addr"].ToString()); // 1) ZipCode Specific Parsing if ((NarrowCondition == "ZipCode") && (WideCondition == "COMMON_KNOWN_AS" || WideCondition == "COMMON_IS_LOCATED")) { if (p_DailyReport.isValidStr(tmatch.Groups["zip"])) { pd.ZipCode = int.Parse(tmatch.Groups["zip"].ToString()); } } // 2) 3) 4) Most others fall in here else if (((NarrowCondition == "Georgia") || (NarrowCondition == "GA") || (NarrowCondition == "no Zip or State")) && (WideCondition == "COMMON_KNOWN_AS")) { // exception state if (NarrowCondition == "no Zip or State") { // Can't get city out + need to manually get out the Address Info p_Cleanup.isRemovedEverythingAfterStreetName(ref pd.Address); } else { pd.City = p_Cleanup.CityCleanup(tmatch.Groups["city"].ToString(), 5, true); } } // 2) exception a few fall in here else if (((NarrowCondition == "Georgia")) && ((WideCondition == "COMMON_IS_LOCATED") || (WideCondition == "COMMON_ENCUMBERED_PR"))) { pd.City = p_Cleanup.CityCleanup(tmatch.Groups["city"].ToString(), 5, true); } // All of them try to assign Apartments string tno; // Some of them could have city info in the city, so check city first // (only in Georgia Condition) - that is where it actually occurs //if (NarrowCondition == "Georgia" && pd.City != "") //{ // pd.isApt = p_Cleanup.isApartment(tmatch.Groups["city"].ToString(), out tno); //} // Otherwise just check the address and assign it if neccessary //if (!(pd.isApt) && (pd.isApt = p_Cleanup.isApartment(pd.Address, out tno)) && (tno != "")) //{ // pd.AptNo = tno; //} }
//Main Parse Logic #region Main Parse Logic public bool Iterate_Over_COMMON_Parse_Functions(ref string desc, ref string found, ref PropertyDetails pd) { // Execute all Parsing! foreach (string usingWideType in COMMON_Get_Wide_ParseConditions()) { if (COMMON_Parse_Functions(usingWideType, ref desc, ref found, ref pd)) { return(true); } } return(false); }
private void btnParse_Click(object sender, EventArgs e) { try { string FileToLoad = Base_Folder_Path + comboBoxFileSelect.Items[comboBoxFileSelect.SelectedIndex].ToString(); if (File.Exists(FileToLoad)) { if (chkParseParam.Checked || textBoxRE.Text != "") { // make sure that all the data is gone before continuing dataGridView1.Columns.Clear(); StreamReader sr = new StreamReader(FileToLoad, (System.Text.Encoding.GetEncoding(1252))); string ToParse = sr.ReadToEnd(); Regex temprx; MatchCollection tmatches; String WideCondition = ""; String NarrowCondition = ""; // Use built in REexpression if (chkParseParam.Checked) { // First parse out what we need from the selection box temprx = new Regex(@"\d\)\s(?<sename>.*)\s-\s(?<pname>.*).txt"); Match tmatch = temprx.Match(comboBoxFileSelect.Items[comboBoxFileSelect.SelectedIndex].ToString()); // Now Assign the correct RE NarrowCondition = tmatch.Groups["sename"].ToString(); WideCondition = tmatch.Groups["pname"].ToString(); temprx = p_DailyR.GetRESubExpression(NarrowCondition); //, tmatch.Groups["pname"].ToString()); } else { temprx = new Regex(@textBoxRE.Text); } string[] Groups = temprx.GetGroupNames(); int noOfGroups = 0; // we don't need digit groups // first calculate the number of non-digit groups foreach (string str in Groups) { if (!char.IsDigit(str, 0)) { ++noOfGroups; } } System.Windows.Forms.DataGridViewTextBoxColumn dl = new System.Windows.Forms.DataGridViewTextBoxColumn(); dl.HeaderText = "LineNumber"; dl.Name = "LineNumber"; dl.FillWeight = 25; this.dataGridView1.Columns.Add(dl); // Now calculate and draw out the column headers and group names ArrayList grouplist = new ArrayList(); foreach (string str in Groups) { // we only want to show named groups if (!char.IsDigit(str, 0)) { System.Windows.Forms.DataGridViewTextBoxColumn dg = new System.Windows.Forms.DataGridViewTextBoxColumn(); dg.HeaderText = str; dg.Name = str; dg.FillWeight = (100 / noOfGroups); this.dataGridView1.Columns.Add(dg); grouplist.Add(str); } } // Now Parse the TextFile and we'll see what happens yeah! :) tmatches = temprx.Matches(ToParse); string[] tstring = new string[(noOfGroups + 1)]; DataGridViewRowCollection rows = this.dataGridView1.Rows; int it = 1; foreach (Match tmatch in tmatches) { tstring[0] = it.ToString(); // we don't need group 0 for (int i = 1; i < (noOfGroups + 1); ++i) { string groupname = grouplist[i - 1].ToString(); if (chkParseParam.Checked) { Parser pr = p_DailyR.get_Parser_pointer; PropertyDetails pd = new PropertyDetails(); Match t = tmatch; pr.ParsePropertyDetail(ref t, WideCondition, NarrowCondition, ref pd); if (groupname == "addr") { tstring[i] = pd.Address; } else if (groupname == "city") { tstring[i] = pd.City; } else if (groupname == "zip") { tstring[i] = pd.ZipCode.ToString(); } else { tstring[i] = tmatch.Groups[groupname].ToString(); } } else { tstring[i] = tmatch.Groups[groupname].ToString(); } } ++it; rows.Add(tstring); } } else { MessageBox.Show("Enter Regular Expression"); } } } catch (Exception eC) { MessageBox.Show("Regulare Expression Error - " + eC.Message); } }