private void ProcessResultsPage() { IList<Element> images = _find.AllByCustom(e => e.Content.Contains("viewImage")); //IList<Element> images = _find.AllByAttributes("tag=img", "title=View Image"); var first = true; var type = LeadType.State; foreach (var element in images) { var lead = new Lead(); //GetDocument(element, ref lead); try { _main.Actions.Click(element); if (first) //wait for image viewer to load, slow first time { type = RecursiveParentTextSearch("FEDERAL TAX", element, 5) ? LeadType.Federal : LeadType.State; Thread.Sleep(1000 * _config.Performancetweaks["imageviewerload"]); first = false; } else Thread.Sleep(1000 * _config.Performancetweaks["imagewaitafterload"]); // wait for image _main.WaitUntilReady(); var documentdound = false; var tempx = _config.Positionals["SavePdf"].X; Coordinate vord = new Coordinate(); _config.Positionals.TryGetValue("SavePdf", out vord); var tempy = vord.X; int x = _manager.ActiveBrowser.Window.Location.X + _config.Positionals["SavePdf"].X; int y = _manager.ActiveBrowser.Window.Location.Y + _config.Positionals["SavePdf"].Y; if (!_manager.ActiveBrowser.Window.IsVisible) { _manager.ActiveBrowser.Window.SetFocus(); } _manager.Desktop.Mouse.Click(MouseClickType.LeftClick, x, y); Thread.Sleep(200 ); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); var rand = new Random(); var path = System.IO.Path.Combine(Properties.Settings.Default.pdfstore, rand.Next(9999999) + "_monroe" + (type == LeadType.Federal ? "_federal" : "_state") + ".pdf"); lead.Document.Disklocation = path; _manager.Desktop.KeyBoard.KeyPress(Keys.Space); Thread.Sleep(1000 * _config.Performancetweaks["textdialogueload"]); _manager.Desktop.KeyBoard.TypeText(path, 10); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Enter); //handle save pop up documentdound = true; //handle file download pop up _manager.DialogMonitor.Stop(); // var detailbrowser = _manager.ActiveBrowser; // if (detailbrowser == null) continue; // // try // { // detailbrowser.RefreshDomTree(); // detailbrowser.WaitUntilReady(); // } // catch (Exception e) { } // // lead = ProcessDetailView(detailbrowser); // //we can leave open the details window for later use // // if (lead.IsValid()) // { // lead.Status = LeadStatus.Complete; // _leads.Add(lead); // continue; // } if (!lead.Document.Disklocation.IsEmpty()) { PerformOCR(ref lead, type); } } catch (Exception ex) { lead.Messages.Add(new Message() { Content = ex.Message, Messagetype = MessageType.Error }); } _leads.Add(lead); } }
protected override void PerformOCR(ref Lead lead, LeadType type) { if (type == LeadType.Federal) { var fed = new Federal(lead.Document.Disklocation, "Monroe"); lead = fed.GetLeadFromOcr(); } else { var state = new MonroeOcr(lead.Document.Disklocation,"Monroe"); lead = state.GetLeadFromOcr(); } }
private bool GetDocument(Element element, ref Lead lead) { throw new NotImplementedException(); }
public void Should_Parse_Business_Names() { var businessnames = new List<string>(); businessnames.Add("Larry's Garage Service"); businessnames.Add("Henry Plumbing"); businessnames.Add("Holland & Hart"); businessnames.Add("Patent Offices of Rick Sanchez, P.C"); businessnames.Add("Steve Wall, Attorney"); businessnames.Add("FRVP"); businessnames.Add("Dave's beef"); foreach (var businessname in businessnames) { var lead = new Lead(); NameHelper.ParseName(ref lead, businessname, true); Assert.IsFalse(lead.Businessname.IsEmpty(), businessname + " didn't parse to business name"); Assert.IsTrue(lead.Last.IsEmpty() && lead.First.IsEmpty(), businessname + " parsed to incorrect place"); } }
public void Should_Parse_Comma_Style_With_Full_Name() { var lead = new Lead(); NameHelper.ParseName(ref lead, "Allen, Rob Milton", false); Assert.IsTrue(lead.Businessname.IsEmpty() && lead.First == "Rob" && lead.Last == "Allen" && lead.Middle == "Milton"); }
public static void WriteLog(Lead arg) { }
public void Should_Parse_3_Token_Name() { var lead = new Lead(); NameHelper.ParseName(ref lead, "Robert Milton Allen", false); Assert.IsTrue(lead.Businessname.IsEmpty() && lead.First == "Robert" && lead.Last == "Allen" && lead.Middle == "Milton"); }
public void Should_Parse_Upper_Case_Name_ToProper() { var lead = new Lead(); NameHelper.ParseName(ref lead, "ROBERT MILTON ALLEN", false); Assert.IsTrue(lead.Businessname.IsEmpty() && lead.First == "Robert" && lead.Last == "Allen" && lead.Middle == "Milton"); }
public void Should_Last_Name_First_With_Comma_And_Just_First() { var lead = new Lead(); NameHelper.ParseName(ref lead, "Allen, Rob", false); Assert.IsTrue(lead.Businessname.IsEmpty() && lead.First == "Rob" && lead.Last == "Allen"); }
private Lead ProcessDetailView(Browser detailbrowser) { var lead = new Lead(); try { lead.Recordeddate = GetNextSiblingText(detailbrowser, e => e.TextContent.Contains("Filed Date")); lead.Debt = GetNextSiblingText(detailbrowser, e => e.TextContent.Contains("Consideration Amt")); // detailbrowser.Find.ByCustom(e => e.TextContent.Contains("Consideration Amt")); lead.Id = GetNextSiblingText(detailbrowser, e => e.TextContent.Contains("Document Number")); lead.Book = GetNextSiblingText(detailbrowser, e => e.TextContent.Contains("Book Number")); lead.Page = GetNextSiblingText(detailbrowser, e => e.TextContent.Contains("Page Number")); //# of Pages var docpages = GetNextSibling(detailbrowser, e => e.TextContent.Contains("# of Pages"));// detailbrowser.Find.ByCustom(e => e.TextContent.Contains("# of Pages")); if (docpages != null) { try { lead.Document.Pages = Convert.ToInt32(docpages.TextContent); } catch (Exception) { } } var headers = detailbrowser.Find.AllByAttributes("class=clsDetailSubHead"); foreach (var headertable in headers.Select(header => GetFirstParentByTagName(header, "table"))) { if (headertable.InnerText.Contains("Property Address")) { //haven't seen one with a property address yet. } else if (headertable.InnerText.IsEmpty()) { var htmlnametable = new HtmlControl(headertable); var namecells = htmlnametable.Find.AllByAttributes("class=clsdetaildata"); var name = ""; if (namecells.Count > 0) { name = namecells[0].InnerText.UnEscapeXml(); if (name.Contains(",")) { var names = name.Split(','); if (names.Length > 1) lead.First = names[1]; lead.Last = names[0]; } else lead.Businessname = name; } } } } catch (Exception e) { lead.Status = LeadStatus.Error; lead.Messages.Add(new Message() { Content = e.Message, Messagetype = MessageType.Error }); } return lead; }
protected virtual void PerformOCR(ref Lead lead, LeadType type) { return; }
private bool GetDocument(Element element, ref Lead lead, bool viewerloaded) { var documentdound = false; try { var detailrow = GetFirstParentByTagName(element, "tr"); var tablerow = new HtmlTableRow(detailrow); var imageclick = tablerow.Find.ByCustom(e => e.Content.Contains("viewImageFrames.asp")); lead.Document.Url = GetUrlFromJavaPopHref(imageclick.Content); //add event listener //AddListener(); _main.Actions.Click(imageclick); //just wait for it to finish loading if (viewerloaded) Thread.Sleep(1000 * _config.Performancetweaks["imageviewerload"]); else Thread.Sleep(1000 * _config.Performancetweaks["imagewaitafterload"]); //documentdound = WaitForJavaApplet();); if (lead.Document.Pages > 0) { //RemoveListener= //try and use the save button //AddListener(Properties.Settings.Default.downloadpath); int x = _manager.ActiveBrowser.Window.Location.X + _config.Positionals["SavePdf"].X; int y = _manager.ActiveBrowser.Window.Location.Y + _config.Positionals["SavePdf"].Y; if (!_manager.ActiveBrowser.Window.IsVisible) { _manager.ActiveBrowser.Window.SetFocus(); } _manager.Desktop.Mouse.Click(MouseClickType.LeftClick, x, y ); Thread.Sleep(200); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); var path = Path.Combine(Properties.Settings.Default.pdfstore, lead.GetHashCode() + ".tif"); // var save = new SaveAsDialog(_manager.ActiveBrowser, DialogButton.SAVE, path, _manager.Desktop); // _manager.DialogMonitor.AddDialog(save); // _manager.DialogMonitor.Start(); _manager.Desktop.KeyBoard.KeyPress(Keys.Space); Thread.Sleep(_config.Performancetweaks["textdialogueload"] * 1000); _manager.Desktop.KeyBoard.TypeText(path, 100 ); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Tab); _manager.Desktop.KeyBoard.KeyPress(Keys.Enter); //handle save pop up documentdound = true; //handle file download pop up _manager.DialogMonitor.Stop(); } var test = _manager.ActiveBrowser.Url; Console.WriteLine(test); // if (!documentfound) // { //try once more // foreach (var browser in _manager.Browsers.Where(browser => browser.ClientId != _main.ClientId)) // { // browser.Close(); // } // _main.Actions.Click(imageclick); // WaitForJavaApplet(); // } foreach (var browser in _manager.Browsers.Where(browser => browser.ClientId != _main.ClientId)) { browser.Close(); } return documentdound; } catch (Exception e) { Console.WriteLine(e.Message); lead.Messages.Add(new Message() { Content = e.Message, Messagetype = MessageType.Error }); } finally { RemoveListener(); } return false; }
public IList<Lead> ProcessMultiple() { //_main = _manager.ActiveBrowser; //first scroll through and try to get the data from the ViewDetails pop up IList<Element> details = _find.AllByCustom(e => e.Content.Contains("Detail.asp")); var first = true; foreach (var element in details) { var lead = new Lead(); try { _main.Actions.Click(element); _main.WaitUntilReady(); var detailbrowser = _manager.ActiveBrowser; if (detailbrowser == null) continue; try { detailbrowser.RefreshDomTree(); detailbrowser.WaitUntilReady(); } catch (Exception e) { } lead = ProcessDetailView(detailbrowser); //we can leave open the details window for later use if (lead.IsValid()) { lead.Status = LeadStatus.Complete; _leads.Add(lead); continue; } if (GetDocument(element, ref lead, first)) { lead.Document.Disklocation = _imagelocation; PerformOCR(ref lead,LeadType.State); } if (first) first = false; } catch (Exception ex) { lead.Messages.Add(new Message() { Content = ex.Message, Messagetype = MessageType.Error }); } _leads.Add(lead); } foreach (var browse in _manager.Browsers) { browse.Close(); } return _leads; }
public static void ParseName(ref Lead lead, string name, bool nameshavecommas) { name = name.UnEscapeXml().Replace(" ", " ").ToUpper(); var isBusiness = false; var isPerson = false; var mastertokens = new List<string>(); //if there is no comma then it is business if (name.Contains(",")) { var commasplit = name.Split(','); var lastname = ""; foreach (var s in commasplit[0].Split(' ')) { if (!nameprefix.Contains(s) && !namesuffix.Contains(s)) lastname += s + " "; } lastname = lastname.Trim(); if (lastname.Split(' ').Count() <= 1 && !businesstokens.Contains(lastname)) { lead.Last = lastname.ToPascalCase(); isPerson = true; foreach (var com in commasplit) { var t = com.Trim(); foreach (var VARIABLE in com.Split(' ')) { if (!nameprefix.Contains(VARIABLE) && !namesuffix.Contains(VARIABLE) && !VARIABLE.IsEmpty() && VARIABLE != lastname) mastertokens.Add(VARIABLE); } } } } else if (nameshavecommas) isBusiness = true; if (name.Contains('\'')) isBusiness = true; //this means it is not standard and we are defaulting to a person and not business if (!isPerson && !isBusiness) { var commasplit = name.Split(','); foreach (var s in commasplit) { var val = s.Trim(); var spacesplit = val.Split(' '); foreach (var s1 in spacesplit) { if (!nameprefix.Contains(s1) && !namesuffix.Contains(s1)) mastertokens.Add(s1); else { isPerson = true; } } if (isPerson) continue; if (!IsInBusinessList(spacesplit) && !MatchesBusinessLengths(spacesplit.Length, commasplit.Count() > 1)) continue; isBusiness = true; break; } if (!isPerson) { if (MatchesBusinessLengths(mastertokens.Count, false)) isBusiness = true; } } if (isBusiness) lead.Businessname = name; else { if (!lead.Last.IsEmpty()) { foreach (var tok in mastertokens) { if (!nameprefix.Contains(tok) && !namesuffix.Contains(tok)) { if (lead.First.IsEmpty()) lead.First = tok.ToPascalCase(); else if (lead.Middle.IsEmpty()) lead.Middle = tok.ToPascalCase(); } } } else { foreach (var tok in mastertokens) { if (!nameprefix.Contains(tok) && !namesuffix.Contains(tok)) { if (lead.First.IsEmpty()) lead.First = tok.ToPascalCase(); else if (lead.Middle.IsEmpty() && mastertokens.Count > 2) lead.Middle = tok.ToPascalCase(); else if (lead.Last.IsEmpty()) lead.Last = tok.ToPascalCase(); } } } } }
private bool GetDocument(Element element, ref Lead lead) { try { _main.Actions.Click(element); //moves forward once var imageel =_find.ByAttributes("src=~https://www.sos.state.co.us/tmpdocs"); var image = new HtmlImage(imageel); if (image != null) { _main.Actions.ScrollToVisible(imageel); _imagelocation = Path.Combine(Properties.Settings.Default.pdfstore, lead.GetHashCode() + Path.GetExtension(image.Src)); WebRequest req = WebRequest.Create(image.Src); WebResponse response = req.GetResponse(); Image.FromStream(response.GetResponseStream()).Save(_imagelocation); return true; } } catch(Exception e) { Console.WriteLine(e.Message); lead.Messages.Add(new Message() { Content = e.Message, Messagetype = MessageType.Error }); } finally { _main.GoBack(); } return false; }
public void Should_Parse_2_token_name() { var lead = new Lead(); NameHelper.ParseName(ref lead,"Rob Allen", false); Assert.IsTrue(lead.Businessname.IsEmpty() && lead.First == "Rob" && lead.Last == "Allen"); }
public IList<Lead> ProcessMultiple() { IList<Element> details = _find.AllByAttributes("href=~javascript:goNextSubmit('sr"); // _find.AllByCustom(e => e.Content.Contains("INTERNAL REVENUE")); foreach (var element in details) { var lead = new Lead(); try { _main.Actions.Click(element); //moves browser forward 1 _main.Actions.InvokeScript(@"goNextSubmit('sr',1)"); //moves broswer forward 1 ///html/body/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr[4]/td[2]/form/div/table/tbody/tr[3]/td/table/tbody/tr[3]/td/table var filingelement = _find.ByTagIndex("table", 7); var htmlfilingelement = new HtmlControl(filingelement); var fnumber = htmlfilingelement.Find.ByXPath("//tr[2]/td[1]"); if (fnumber != null) { lead.Id = fnumber.InnerText.Substring(0, fnumber.InnerMarkup.ToLower().IndexOf("<br>")); } var date = htmlfilingelement.Find.ByXPath("//tr[2]/td[3]"); if (date != null) lead.Recordeddate = date.InnerText; //var imagelink = htmlfilingelement.Find.ByXPath("//tr[2]/td[5]"); var imagelink = _find.ByAttributes("href=~javascript:goNextSubmit('sr"); //get name // /html/body/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr[4]/td[2]/form/div/table/tbody/tr[5]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td var nameelement = _find.ByXPath("//html/body/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr[4]/td[2]/form/div/table/tbody/tr[5]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td"); if (nameelement != null) { var a =new string[1] ; a[0] = "<br>"; string[] split = nameelement.InnerMarkup.ToLower().Split(a, StringSplitOptions.None); if (split.Length == 3) { var name = split[0]; if (name.Contains(",")) { var names = name.Split(','); if (names.Length > 1) { if (names[0].Split(' ').Length > 1) lead.Businessname = name; else { lead.First = names[1]; lead.Last = names[0]; } } else lead.Businessname = name; } else lead.Businessname = name; lead.Address.Streetaddress1 = split[1]; //city, co zip var ad2 = split[2]; var s2 = ad2.Split(','); if (s2.Length == 2) { lead.Address.City = s2[0]; var s3 = s2[1].Split(' '); if (s3.Length == 2) { lead.Address.State = s3[0]; lead.Address.Zip = s3[1]; } } } } if (imagelink != null) { lead.Document.Url = imagelink.InnerText; if (GetDocument(imagelink, ref lead)) { lead.Document.Disklocation = _imagelocation; PerformOCR(ref lead,LeadType.Federal); } } _main.GoBack(); _main.GoBack(); } catch (Exception ex) { lead.Messages.Add(new Message() { Content = ex.Message, Messagetype = MessageType.Error }); } _leads.Add(lead); } return _leads; }
protected BaseOcrParser(string filename, string county) : base(filename) { _lead = new Lead(); _lead.Address.County = county; }