Example #1
0
        private string[] FormatLines(TextExtractionResult pdfContents)
        {
            var result  = new List <string>();
            var pdfText = pdfContents.Text.Replace("\r\n\r\n", "@@NEWPARAGRAPH@@").Replace("-\r\n", "");

            var sb         = new StringBuilder();
            var paragraphs = pdfText.Replace("@@NEWPARAGRAPH@@", "\n").Replace("\n\n", "").Split('\n');

            foreach (var paragraph in paragraphs)
            {
                var sentenceParts = paragraph.Split('.');
                for (int i = 0; i < sentenceParts.Length; i++)
                {
                    sb.Append(sentenceParts[i].Replace("-", ""));

                    if (sb.Length > 0)
                    {
                        result.Add(sb.ToString() + ".");
                        sb.Clear();
                    }
                }
                result.Add("\r\n");
            }
            return(result.ToArray());
        }
Example #2
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            var rslt = _regex.Match(rawInputBody.Text);

            if (rslt.Success)
            {
                var tmp          = rslt.Groups[1].Value.ToUpper().Trim("\r\n ".ToCharArray());
                var dists        = tmp.Split(@"/\; ".ToCharArray());
                var distribution = dists.Aggregate((left, right) =>
                {
                    return((left + ";" + right.ToUpper().Trim()).Replace("DIII", "D3").Replace("DII", "D2").Replace("DV", "D5"));
                });

                distribution = distribution.Substring(0, distribution.IndexOf("FILE")).Trim("; ".ToCharArray());
                distribution = _redundantColon.Replace(distribution, ";");
                report.Add(KnownReportParts.PART_DISTRIBUTION, RemoveSpaces(distribution));
            }
            else
            {
                if (_required)
                {
                    throw new PartNotFoundException("Distribution was not found.");
                }
            }
        }
Example #3
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            if (DocEval.Any(d => report[KnownReportParts.PART_CNR].ToUpper().EndsWith(d)))
            {
                report.Add(KnownReportParts.PART_EVALUATION, "DOC");
                return;
            }

            var match = _regex.Match(rawInputBody.Text);

            if (match.Success)
            {
                if (match.Groups[1].Value.ToUpper().Contains("DOC"))
                {
                    report.Add(KnownReportParts.PART_EVALUATION, "DOC");
                    return;
                }
                else
                {
                    var match2 = _regex2.Match(match.Groups[1].Value.ToUpper());
                    if (match2.Success)
                    {
                        report.Add(KnownReportParts.PART_EVALUATION, match2.Groups[1].Value + match2.Groups[2].Value);
                        return;
                    }
                }
            }

            if (_required)
            {
                throw new PartNotFoundException("Evaluation was not found");
            }
        }
Example #4
0
        public void Build(Dictionary <string, string> report, TextExtractionResult textExtractionResult)
        {
            try
            {
                if (report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".SDDP"))
                {
                    var matchSddp = _regexSddp.Match(textExtractionResult.Text);
                    if (!matchSddp.Success)
                    {
                        if (_required)
                        {
                            throw new PartNotFoundException("Date of report not found");
                        }
                    }

                    var monthStr = matchSddp.Groups[2].Value.Trim().ToLower().Substring(0, 3);
                    var monthNum = _lookup[monthStr];
                    var yearStr  = matchSddp.Groups[3].Value.Trim();
                    var day      = int.Parse(matchSddp.Groups[1].Value.Trim());
                    if (yearStr.Length == 2)
                    {
                        yearStr = (int.Parse(yearStr) + 2000).ToString();
                    }

                    report.Add(KnownReportParts.PART_DATEOFREPORT_STR, $"{monthNum}-{day:00}-{yearStr}");

                    report.Add(KnownReportParts.PART_DATEOFREPORT_UTC, DateTime.Parse($"{yearStr}-{monthNum}-{day:00}").ToString("0"));
                    return;
                }

                var match = _regex.Match(textExtractionResult.Text);
                if (match.Success)
                {
                    var monthStr = match.Groups[2].Value.Trim().ToLower().Substring(0, 3);
                    var monthNum = _lookup[monthStr];
                    var yearStr  = match.Groups[3].Value.Trim();
                    var day      = int.Parse(match.Groups[1].Value.Trim());
                    if (yearStr.Length == 2)
                    {
                        yearStr = (int.Parse(yearStr) + 2000).ToString();
                    }

                    report.Add(KnownReportParts.PART_DATEOFREPORT_STR, $"{monthNum}-{day:00}-{yearStr}");
                    report.Add(KnownReportParts.PART_DATEOFREPORT_UTC, DateTime.Parse($"{yearStr}-{monthNum}-{day:00}").ToString("O"));
                }
                else
                {
                    if (_required)
                    {
                        throw new PartNotFoundException("Date of report not found");
                    }
                }
            }
            catch (Exception)
            {
                throw new PartNotFoundException("Date of report not found");
            }
        }
 private void textBox1_DragDrop(object sender, DragEventArgs e)
 {
     string[] files = (string[])e.Data.GetData(DataFormats.FileDrop);
     if (files != null && files.Length != 0)
     {
         TextExtractionResult textExtractionResult = _textExtractor.Extract(files[0]);
         textBox1.Text = textExtractionResult.Text;
     }
 }
Example #6
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            Match match = null;

            if (OPNLS.Any(o => report[KnownReportParts.PART_CNR].ToUpper().EndsWith(o)))
            {
                report.Add(KnownReportParts.PART_SUBJECT, $"{report[KnownReportParts.REPORTTYPE].ToUpper()} '{report[KnownReportParts.PART_CNR]}'");
            }
            else if (report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".ESR"))
            {
                match = _regex1.Match(rawInputBody.Text);
                if (match.Success)
                {
                    report.Add(KnownReportParts.PART_SUBJECT, _regex3.Replace(Cleanup(match.Groups[1].Value), ""));
                }
            }

            else if (report[KnownReportParts.REPORTTYPE].ToUpper() == "AFTER MEETING REPORT" ||
                     report[KnownReportParts.REPORTTYPE].ToUpper() == "AFTER ACTIVITY REPORT" ||
                     report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".SDDP"))
            {
                if (rawInputBody.Metadata.ContainsKey("subject") && !string.IsNullOrWhiteSpace(rawInputBody.Metadata["subject"]))
                {
                    report.Add(KnownReportParts.PART_SUBJECT, RemoveRedundantSpaces(rawInputBody.Metadata["subject"].ToUpper()));
                }
                else
                {
                    report.Add(KnownReportParts.PART_SUBJECT, RemoveRedundantSpaces(report[KnownReportParts.REPORTTYPE].ToUpper()));
                }
            }
            else
            {
                match = _regex.Match(rawInputBody.Text);
                if (match.Success)
                {
                    report.Add(KnownReportParts.PART_SUBJECT,
                               RemoveInBetweenWhiteSpaces(match.Groups[2].Value.Trim("\r\n\t ".ToCharArray()).ToUpper()));
                }
            }


            if (!report.ContainsKey(KnownReportParts.PART_SUBJECT) && _required)
            {
                throw new PartNotFoundException("Subject was not found.");
            }
        }
Example #7
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            if (report.ContainsKey(KnownReportParts.PART_FILENAME) == false)
            {
                throw new PartNotFoundException("A filename is needed in order to parse the CNR of report.");
            }

            var rslt = _regex.Match(report[KnownReportParts.PART_FILENAME]);

            if (rslt.Success)
            {
                report[KnownReportParts.PART_CNR] = rslt.Groups[0].Value.Trim().ToUpper();
            }
            else
            {
                if (_required)
                {
                    throw new PartNotFoundException("CNR was not found.");
                }
            }
        }
        public string ParseMediaText(byte[] data, Action <Exception> onError, out Dictionary <string, string> MetaData)
        {
            TextExtractor textExtractor = new TextExtractor();
            var           metaData      = new Dictionary <string, string>();
            var           sb            = new StringBuilder();

            try
            {
                TextExtractionResult textExtractionResult = textExtractor.Extract(data);

                if (!string.IsNullOrWhiteSpace(textExtractionResult.Text))
                {
                    metaData = (Dictionary <string, string>)textExtractionResult.Metadata;

                    sb.Append(textExtractionResult.Text);
                }
            }
            catch (Exception ex)
            {
                onError(ex);
            }
            MetaData = metaData;
            return(sb.ToString());
        }
Example #9
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            if (report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".AMR") ||
                report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".AAR") ||
                report[KnownReportParts.PART_CNR].ToUpper().EndsWith(".SDDP"))
            {
                report.Add(KnownReportParts.PART_BODY, RemoveInBetweenWhiteSpaces(rawInputBody.Text));
                return;
            }

            var match = _regex.Match(rawInputBody.Text);

            if (match.Success)
            {
                report.Add(KnownReportParts.PART_BODY,
                           RemoveInBetweenWhiteSpaces(match.Groups[1].Value.Trim("\r\n\t ".ToCharArray())));
                return;
            }

            var match2 = _regex2.Match(rawInputBody.Text);

            if (match2.Success)
            {
                report.Add(KnownReportParts.PART_BODY,
                           RemoveInBetweenWhiteSpaces(match2.Groups[1].Value.Trim("\r\n\t ".ToCharArray())));
                return;
            }


            if (string.IsNullOrWhiteSpace(rawInputBody.Text))
            {
                throw new PartNotFoundException("Report body not found");
            }

            report.Add(KnownReportParts.PART_BODY, Cleanup(rawInputBody.Text));
        }
Example #10
0
        public void Build(Dictionary <string, string> report, TextExtractionResult rawInputBody)
        {
            if (report.ContainsKey(KnownReportParts.PART_CNR) == false)
            {
                throw new PartNotFoundException("CNR must exist to determine report type");
            }

            var cnr   = report[KnownReportParts.PART_CNR];
            var match = _regex.Match(cnr);

            if (match.Success)
            {
                var key        = match.Groups[1].Value.Trim().ToUpper();
                var reportType = _reportTypes[key];
                report.Add(KnownReportParts.REPORTTYPE, RemoveRedundantSpaces(reportType));
            }
            else
            {
                if (_required)
                {
                    throw new PartNotFoundException("Report Type not found.");
                }
            }
        }