private void FindFont(string extractedText, string text) { CompareAddresses = new List <SinglePdfLine>(); PdfReader reader = new PdfReader(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), text)); TextWithFontExtractionStategy S = new TextWithFontExtractionStategy(); string XmlDocument = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, S); try { Regex RegexObj = new Regex("(?<data><span.*?" + extractedText + ".*?span>)"); MatchCollection collection = Regex.Matches(XmlDocument, "(?<data><span.*?" + extractedText + ".*?span>)"); foreach (Match x in collection) { SinglePdfLine adobj = new ItextSharp.SinglePdfLine(); string data = x.Groups["data"].Value; if (data.Contains("NOTBOLD")) { adobj.Bold = false; data = data.Replace("NOTBOLD", ""); } else { adobj.Bold = true; data = data.Replace("BOLD", ""); } adobj.Color = Regex.Match(data, "<span style=.*?color=(?<data>.*?)\">").Groups["data"].Value; adobj.FontFamily = Regex.Match(data, "<span style=\"font-family:(?<data>.*?);").Groups["data"].Value; string FontSize = Regex.Match(data, "<span style=.*?font-size:(?<data>.*?);.*?>").Groups["data"].Value; string coordinates = Regex.Match(data, "<span style=.*?coordinates:(?<data>.*?);.*?>").Groups["data"].Value; adobj.Address = Regex.Match(data, "<span.*?\">(?<data>.*?)<.span>").Groups["data"].Value; string[] splits = coordinates.Split(','); adobj.URX = float.Parse(splits[0]); adobj.URY = float.Parse(splits[1]); adobj.LLX = float.Parse(splits[2]); adobj.LLY = float.Parse(splits[3]); adobj.FontSize = float.Parse(FontSize); CompareAddresses.Add(adobj); } } catch (ArgumentException ex) { MessageBox.Show("Argument Exception in Form1 Find Font , " + ex.Message); } }
private void button4_Click(object sender, EventArgs e) { if (listView1.Items.Count == 0) { MessageBox.Show("Kindly Find the adresses in PDF"); return; } FullAddressesLog log = FullAddressesLog.Create; FullAdress theme = new FullAdress(); var x = listView1.Items; foreach (ListViewItem i in x) { Tempadress = new ItextSharp.SinglePdfLine(); Tempadress.Address = i.SubItems[0].Text; Tempadress.FontSize = float.Parse(i.SubItems[1].Text); Tempadress.FontFamily = i.SubItems[2].Text; Tempadress.Bold = bool.Parse(i.SubItems[3].Text); Tempadress.Italic = bool.Parse(i.SubItems[4].Text); Tempadress.Color = i.SubItems[5].Text; Tempadress.DocumentType = textBoxDocumentType.Text; Tempadress.PageNo = (int)numericUpDown1.Value; if (i.Selected == true) { Tempadress.LLX = (int)numericUpDown4.Value; Tempadress.LLY = (int)numericUpDown5.Value; Tempadress.URX = (int)numericUpDown2.Value; Tempadress.URY = (int)numericUpDown3.Value; } else { Tempadress.URX = float.Parse(i.SubItems[6].Text); Tempadress.URY = float.Parse(i.SubItems[7].Text); Tempadress.LLX = float.Parse(i.SubItems[8].Text); Tempadress.LLY = float.Parse(i.SubItems[9].Text); } theme.AdressLines.Add(Tempadress); } theme.FullAdressID = Tempadress.DocumentType; if (log.ThemeList.ContainsKey(theme.FullAdressID)) { log.ThemeList.Remove(theme.FullAdressID); } log.ThemeList.Add(theme.FullAdressID, theme); MessageBox.Show("Saved Successfully"); }
private void FindFont(string extractedText) { string[] ExtractedTexts = extractedText.Split('\n'); Numeric_KeyPad_Lock = true; PdfReader reader = new PdfReader(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), textBox1.Text)); TextWithFontExtractionStategy S = new TextWithFontExtractionStategy(); string XmlDocument = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, Int32.Parse(numericUpDown1.Value.ToString()), S); foreach (var extractedtext in ExtractedTexts) { try { string Regexe = "(?<data><span.*?" + extractedtext + ".*?span>)"; // Regex RegexObj = new Regex("(?<data><span.*?" +Regexe+ ".*?span>)"); MatchCollection collection = Regex.Matches(XmlDocument, "(?<data><span.*?" + extractedtext.Replace(" ", @"\s") + ".*?span>)"); foreach (Match x in collection) { SinglePdfLine adobj = new ItextSharp.SinglePdfLine(); string data = x.Groups["data"].Value; if (data.Contains("NOTBOLD")) { adobj.Bold = false; data = data.Replace("NOTBOLD", ""); } else { adobj.Bold = true; data = data.Replace("BOLD", ""); } adobj.Color = Regex.Match(data, "<span style=.*?color=(?<data>.*?);.*?>").Groups["data"].Value; adobj.FontFamily = Regex.Match(data, "<span style=\"font-family:(?<data>.*?);.*?").Groups["data"].Value; string FontSize = Regex.Match(data, "<span style=.*?font-size:(?<data>.*?);.*?>").Groups["data"].Value; string coordinates = Regex.Match(data, "<span style=.*?coordinates:(?<data>.*?);.*?>").Groups["data"].Value; string[] splits = coordinates.Split(','); adobj.URX = float.Parse(splits[0]); adobj.URY = float.Parse(splits[1]); adobj.LLX = float.Parse(splits[2]); adobj.LLY = float.Parse(splits[3]); adobj.FontSize = float.Parse(FontSize); ListViewItem item = new ListViewItem(); item.SubItems.Add("Address"); item.SubItems.Add("Size"); item.SubItems.Add("Font"); item.SubItems.Add("Bold"); item.SubItems.Add("Italic"); item.SubItems.Add("Color"); item.SubItems.Add("URX"); item.SubItems.Add("URY"); item.SubItems.Add("LLX"); item.SubItems.Add("LLY"); item.SubItems[0].Text = extractedtext; item.SubItems[2].Text = adobj.FontFamily; item.SubItems[1].Text = adobj.FontSize.ToString(); item.SubItems[3].Text = adobj.Bold.ToString(); item.SubItems[4].Text = adobj.Italic.ToString(); item.SubItems[5].Text = adobj.Color.ToString(); Tempadress = new SinglePdfLine(); try { numericUpDown2.Value = (int)adobj.URX; item.SubItems[6].Text = adobj.URX.ToString(); Tempadress.URX = adobj.URX; } catch (ArgumentException) { MessageBox.Show("Argument Exception while Getting numeric box "); } try { numericUpDown3.Value = (int)adobj.URY; Tempadress.URY = adobj.URY; item.SubItems[7].Text = adobj.URY.ToString(); } catch (ArgumentException) { MessageBox.Show("Argument Exception while Getting numeric box "); } try { numericUpDown4.Value = (int)adobj.LLX; Tempadress.LLX = adobj.LLX; item.SubItems[8].Text = adobj.LLX.ToString(); } catch (ArgumentException) { MessageBox.Show("Argument Exception while Getting numeric box "); } try { numericUpDown5.Value = (int)adobj.LLY; Tempadress.LLY = adobj.LLY; item.SubItems[9].Text = adobj.LLY.ToString(); } catch (ArgumentException) { MessageBox.Show("Argument Exception while Getting numeric box "); } adobj.DocumentType = textBoxDocumentType.Text; listView1.Items.Add(item); listView1.Update(); Numeric_KeyPad_Lock = false; radioButton3.Checked = true; } } catch (ArgumentException) { MessageBox.Show("Input String contains invalid characters"); } } }