예제 #1
0
        private bool ParseImpl()
        {
            MatchCollection matches;
            DateTime        currentDate;
            String          link;

            // Retrieve all properties on each index
            if (retrieveMatchesFromIndex(out matches))
            {
                // Iterate thru each property
                foreach (Match match in matches)
                {
                    if (!bIsRunning)
                    {
                        bIsRunning = true;
                    }

                    if (bForceStop)
                    {
                        bForceStop = false;
                        bIsRunning = false;
                        return(false);
                    }

                    // Get the Date and link of each property
                    if (isMatchCorrect(match, out currentDate, out link))
                    {
                        if (CheckDateTimeWithRegistry(ref currentDate))
                        {
                            GetPropertyPage(link);

                            string          desc  = "";
                            string          found = "";
                            PropertyDetails pd    = new PropertyDetails();

                            if (p_Parser.Iterate_Over_COMMON_Parse_Functions(ref desc, ref found, ref pd))
                            {
                                FileAppend((desc + ".txt"), found, false);
                                ++successfully_parsed;
                            }
                            else
                            {
                                // We want to write out only changed Dates into the HTML
                                WriteNotParsedPropertyToFile(HasDateTimeChanged(ref currentDate), ref currentDate, link);
                                ++unsuccessfully_parsed;
                            }

                            System.Threading.Thread.Sleep(SLEEP_TIME_OUT_AFTER_EVERY_PROPERTY);
                        }
                        else
                        {
                            // we are done NO MORE PARSING IS REQUIRED // exit here SOMEHOW
                            // bDone = true;
                            return(false);
                        }
                    }
                }
            }
            return(false);
        }
예제 #2
0
        private bool COMMON_Parse_Functions(string WIDE_PARSE_NAME, ref string desc, ref string found, ref PropertyDetails pd)
        {
            // temp
            Regex Widerx;
            Match Widematch;
            Regex specificrx;
            Match specificmatch;
            int   it = 1;

            // Try Narrowing Down the Wide Search into Specific Types
            foreach (string usingNarrowingDownType in COMMON_Get_NarrowingDown_ParseConditions())
            {
                Widerx    = new Regex(COMMON_Parse_RegularExpressionsWide(WIDE_PARSE_NAME) + COMMON_Parse_RegularExpressionsNarrowingDown(usingNarrowingDownType));
                Widematch = Widerx.Match(p_DailyReport.CURRENT_PROPERTY_PAGE);
                if (Widematch.Groups.Count > 1)
                {
                    // description is important
                    desc  = it.ToString() + ") " + usingNarrowingDownType + " - " + WIDE_PARSE_NAME;
                    found = Widematch.Groups[0].ToString();

                    // very good but not perfect match
                    // (a few inconsistencies remain):
                    specificrx    = COMMON_Parse_RegularExpressionsVerySpecific(usingNarrowingDownType);
                    specificmatch = specificrx.Match(Widematch.Groups["compl"].ToString());

                    // If Found using Wide + Narrowing down - we now need to match Specific and
                    // try getting the data
                    ParsePropertyDetail(ref Widematch, WIDE_PARSE_NAME, usingNarrowingDownType, ref pd);

                    return(true);
                }
                ++it;
            }

            return(false);
        }
예제 #3
0
        // used ONLY to actualy now finally get the actual values - IMP
        public void ParsePropertyDetail(ref Match tmatch, string WideCondition, string NarrowCondition, ref PropertyDetails pd)
        {
            // property details
            pd.isApt   = false;
            pd.City    = "";
            pd.State   = "Georgia";
            pd.Address = "";
            pd.AptNo   = "";
            pd.ZipCode = 0;
            pd.SQLlink = 0;

            // All of them have an address field
            pd.Address = p_Cleanup.AddressCleanup(tmatch.Groups["addr"].ToString());

            // 1) ZipCode Specific Parsing
            if ((NarrowCondition == "ZipCode") && (WideCondition == "COMMON_KNOWN_AS" || WideCondition == "COMMON_IS_LOCATED"))
            {
                if (p_DailyReport.isValidStr(tmatch.Groups["zip"]))
                {
                    pd.ZipCode = int.Parse(tmatch.Groups["zip"].ToString());
                }
            }
            // 2) 3) 4) Most others fall in here
            else if (((NarrowCondition == "Georgia") || (NarrowCondition == "GA") || (NarrowCondition == "no Zip or State")) &&
                     (WideCondition == "COMMON_KNOWN_AS"))
            {
                // exception state
                if (NarrowCondition == "no Zip or State")
                {
                    // Can't get city out + need to manually get out the Address Info
                    p_Cleanup.isRemovedEverythingAfterStreetName(ref pd.Address);
                }
                else
                {
                    pd.City = p_Cleanup.CityCleanup(tmatch.Groups["city"].ToString(), 5, true);
                }
            }
            // 2) exception a few fall in here
            else if (((NarrowCondition == "Georgia")) &&
                     ((WideCondition == "COMMON_IS_LOCATED") || (WideCondition == "COMMON_ENCUMBERED_PR")))
            {
                pd.City = p_Cleanup.CityCleanup(tmatch.Groups["city"].ToString(), 5, true);
            }

            // All of them try to assign Apartments
            string tno;

            // Some of them could have city info in the city, so check city first
            // (only in Georgia Condition) - that is where it actually occurs
            //if (NarrowCondition == "Georgia" && pd.City != "")
            //{
            //    pd.isApt = p_Cleanup.isApartment(tmatch.Groups["city"].ToString(), out tno);
            //}
            // Otherwise just check the address and assign it if neccessary
            //if (!(pd.isApt) && (pd.isApt = p_Cleanup.isApartment(pd.Address, out tno)) && (tno != ""))
            //{
            //    pd.AptNo = tno;
            //}
        }
예제 #4
0
        //Main Parse Logic
        #region Main Parse Logic
        public bool Iterate_Over_COMMON_Parse_Functions(ref string desc, ref string found, ref PropertyDetails pd)
        {
            // Execute all Parsing!
            foreach (string usingWideType in COMMON_Get_Wide_ParseConditions())
            {
                if (COMMON_Parse_Functions(usingWideType, ref desc, ref found, ref pd))
                {
                    return(true);
                }
            }

            return(false);
        }
예제 #5
0
        private void btnParse_Click(object sender, EventArgs e)
        {
            try
            {
                string FileToLoad = Base_Folder_Path + comboBoxFileSelect.Items[comboBoxFileSelect.SelectedIndex].ToString();

                if (File.Exists(FileToLoad))
                {
                    if (chkParseParam.Checked || textBoxRE.Text != "")
                    {
                        // make sure that all the data is gone before continuing
                        dataGridView1.Columns.Clear();

                        StreamReader sr      = new StreamReader(FileToLoad, (System.Text.Encoding.GetEncoding(1252)));
                        string       ToParse = sr.ReadToEnd();

                        Regex           temprx;
                        MatchCollection tmatches;
                        String          WideCondition   = "";
                        String          NarrowCondition = "";

                        // Use built in REexpression
                        if (chkParseParam.Checked)
                        {
                            // First parse out what we need from the selection box
                            temprx = new Regex(@"\d\)\s(?<sename>.*)\s-\s(?<pname>.*).txt");
                            Match tmatch = temprx.Match(comboBoxFileSelect.Items[comboBoxFileSelect.SelectedIndex].ToString());

                            // Now Assign the correct RE
                            NarrowCondition = tmatch.Groups["sename"].ToString();
                            WideCondition   = tmatch.Groups["pname"].ToString();

                            temprx = p_DailyR.GetRESubExpression(NarrowCondition); //, tmatch.Groups["pname"].ToString());
                        }
                        else
                        {
                            temprx = new Regex(@textBoxRE.Text);
                        }

                        string[] Groups     = temprx.GetGroupNames();
                        int      noOfGroups = 0;    // we don't need digit groups

                        // first calculate the number of non-digit groups
                        foreach (string str in Groups)
                        {
                            if (!char.IsDigit(str, 0))
                            {
                                ++noOfGroups;
                            }
                        }

                        System.Windows.Forms.DataGridViewTextBoxColumn dl = new System.Windows.Forms.DataGridViewTextBoxColumn();
                        dl.HeaderText = "LineNumber";
                        dl.Name       = "LineNumber";
                        dl.FillWeight = 25;
                        this.dataGridView1.Columns.Add(dl);

                        // Now calculate and draw out the column headers and  group names
                        ArrayList grouplist = new ArrayList();
                        foreach (string str in Groups)
                        {
                            // we only want to show named groups
                            if (!char.IsDigit(str, 0))
                            {
                                System.Windows.Forms.DataGridViewTextBoxColumn dg = new System.Windows.Forms.DataGridViewTextBoxColumn();
                                dg.HeaderText = str;
                                dg.Name       = str;
                                dg.FillWeight = (100 / noOfGroups);
                                this.dataGridView1.Columns.Add(dg);
                                grouplist.Add(str);
                            }
                        }

                        // Now Parse the TextFile and we'll see what happens yeah! :)
                        tmatches = temprx.Matches(ToParse);

                        string[] tstring = new string[(noOfGroups + 1)];
                        DataGridViewRowCollection rows = this.dataGridView1.Rows;
                        int it = 1;

                        foreach (Match tmatch in tmatches)
                        {
                            tstring[0] = it.ToString();
                            // we don't need group 0
                            for (int i = 1; i < (noOfGroups + 1); ++i)
                            {
                                string groupname = grouplist[i - 1].ToString();

                                if (chkParseParam.Checked)
                                {
                                    Parser pr = p_DailyR.get_Parser_pointer;

                                    PropertyDetails pd = new PropertyDetails();
                                    Match           t  = tmatch;
                                    pr.ParsePropertyDetail(ref t, WideCondition, NarrowCondition, ref pd);

                                    if (groupname == "addr")
                                    {
                                        tstring[i] = pd.Address;
                                    }
                                    else if (groupname == "city")
                                    {
                                        tstring[i] = pd.City;
                                    }
                                    else if (groupname == "zip")
                                    {
                                        tstring[i] = pd.ZipCode.ToString();
                                    }
                                    else
                                    {
                                        tstring[i] = tmatch.Groups[groupname].ToString();
                                    }
                                }
                                else
                                {
                                    tstring[i] = tmatch.Groups[groupname].ToString();
                                }
                            }
                            ++it;
                            rows.Add(tstring);
                        }
                    }
                    else
                    {
                        MessageBox.Show("Enter Regular Expression");
                    }
                }
            }
            catch (Exception eC)
            {
                MessageBox.Show("Regulare Expression Error - " + eC.Message);
            }
        }