private void parsePDF() { PDDocument doc = PDDocument.load("2.pdf"); PDFTextStripper stripper = new PDFTextStripper(); //stripper.setSortByPosition(true); string text = stripper.getText(doc); stripper.getSeparateByBeads(); stripper.getTextLineMatrix(); Regex regex = new Regex("Сокращенное наименование (.*)\"", RegexOptions.Multiline | RegexOptions.IgnoreCase); var orgName = regex.Match(text).Groups[1].Value; regex = new Regex("ИНН (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var inn = regex.Match(text).Groups[1].Value; regex = new Regex("КПП (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var kpp = regex.Match(text).Groups[1].Value; regex = new Regex("Должность (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var post = regex.Match(text).Groups[1].Value; regex = new Regex("Фамилия (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var lastName = regex.Match(text).Groups[1].Value; regex = new Regex("Имя (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var firstName = regex.Match(text).Groups[1].Value; regex = new Regex("Отчество (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var surName = regex.Match(text).Groups[1].Value; regex = new Regex("ОГРН (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var ogrn = regex.Match(text).Groups[1].Value; regex = new Regex("Почтовый индекс (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var postIndex = regex.Match(text).Groups[1].Value; regex = new Regex("Субъект Российской Федерации (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var city = regex.Match(text).Groups[1].Value; regex = new Regex("Улица /(проспект, переулок и т.д./) (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var street = regex.Match(text).Groups[1].Value; regex = new Regex("Дом /(владение и т.п./) (.*)(\n[^0-9])?.*\r", RegexOptions.Multiline | RegexOptions.IgnoreCase); var house = regex.Match(text).Groups[1].Value; textBox4.Text = text; textBox3.Text = orgName + " / " +inn + " / " +kpp + " / " +post + " / " +lastName + " / " +firstName + " / " +surName +" / " + ogrn + " / " + postIndex + " / "+ city +" / "+street+" / "+house; //textBox3.Text = match[0].ToString(); //string[] lines = text.Split(new string[] { "\r?\n" }, StringSplitOptions.None); // give you all the lines separated by new line //string[] cols = lines[0].Split(new string[] { "\\s+ " }, StringSplitOptions.None); // gives array separated by whitespaces //textBox3.Text = cols[0].ToString(); //return stripper.getText(doc); }