Exemple #1
0
        public static string Parse(FileStream file)
        {
            string retVal = "";

            try
            {
                SpreadsheetDocument excelDoc =
                    SpreadsheetDocument.Open(file, false);
                foreach (Sheet sheet in excelDoc.WorkbookPart.Workbook.Descendants <Sheet>())
                {
                    WorksheetPart worksheetPart = (WorksheetPart)excelDoc.WorkbookPart.GetPartById(sheet.Id);
                    Worksheet     worksheet     = worksheetPart.Worksheet;

                    SharedStringTablePart shareStringPart = excelDoc.WorkbookPart.GetPartsOfType <SharedStringTablePart>().First();
                    SharedStringItem[]    items           = shareStringPart.SharedStringTable.Elements <SharedStringItem>().ToArray();

                    // Create a new filename and save this file out.

                    foreach (var row in worksheet.Descendants <Row>())
                    {
                        StringBuilder sb = new StringBuilder();
                        foreach (Cell cell in row)
                        {
                            string value = string.Empty;
                            if (cell.CellValue != null)
                            {
                                // If the content of the first cell is stored as a shared string, get the text
                                // from the SharedStringTablePart. Otherwise, use the string value of the cell.
                                if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
                                {
                                    value = items[int.Parse(cell.CellValue.Text)].InnerText;
                                }
                                else
                                {
                                    value = cell.CellValue.Text;
                                }
                            }

                            // to be safe, always use double quotes.
                            sb.Append(string.Format("\"{0}\"\t", value.Trim()));
                        }
                        retVal += sb.ToString().TrimEnd(',') + System.Environment.NewLine;
                    }
                }
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }
            return(retVal);
        }
Exemple #2
0
        public static string Parse(FileStream file)
        {
            MsgReader.Reader read   = new Reader();
            string           retVal = "";

            try
            {
                using (MemoryStream ms = new MemoryStream())
                {
                    if (file.Name.ToLower().EndsWith("eml"))
                    {
                        file.CopyTo(ms);
                        MsgReader.Mime.Message msg = new MsgReader.Mime.Message(ms.ToArray());
                        retVal += "From: " + (msg.Headers.From == null ? "" : msg.Headers.From.DisplayName) + Environment.NewLine;
                        retVal += "To: " + ParseAddresses(msg.Headers.To) + Environment.NewLine;
                        retVal += "Cc: " + ParseAddresses(msg.Headers.Cc) + Environment.NewLine;
                        retVal += "Bcc: " + ParseAddresses(msg.Headers.Bcc) + Environment.NewLine;
                        retVal += "Subject: " + msg.Headers.Subject + Environment.NewLine;

                        retVal += msg.TextBody.GetBodyAsText();
                    }
                    else if (file.Name.ToLower().EndsWith("msg"))
                    {
                        MsgReader.Outlook.Storage.Message msg = new MsgReader.Outlook.Storage.Message(file, FileAccess.Read);
                        retVal += "From: " + (msg.Sender == null ? "" : msg.Sender.DisplayName + " " + msg.Sender.Email) + Environment.NewLine;
                        foreach (MsgReader.Outlook.Storage.Recipient rec in msg.Recipients)
                        {
                            retVal += "To: " + rec.DisplayName + " " + rec.Email + Environment.NewLine;
                        }

                        retVal += "Subject: " + msg.Subject + Environment.NewLine;
                        retVal += msg.BodyText;

                        foreach (MsgReader.Outlook.Storage.Attachment a in msg.Attachments)
                        {
                            string fileName = DateTime.Now.Ticks.ToString() + a.FileName;
                            File.WriteAllBytes(_tmpFilesFolder + "\\" + fileName, a.Data);
                            FileInfo f = new FileInfo(fileName);
                            retVal += fileName + ": " + FileUtils.ParseSync(new FileInfo(_tmpFilesFolder + "\\" + fileName));
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }
            return(retVal);
        }
Exemple #3
0
        public static string Parse(FileStream file)
        {
            string retVal = "";

            try
            {
                Spire.Doc.Document doc = new Spire.Doc.Document(file);
                retVal = doc.GetText();
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }

            return(retVal);
        }
Exemple #4
0
        public static string Parse(FileStream file)
        {
            string retVal = "";

            try
            {
                WordprocessingDocument wordDoc =
                    WordprocessingDocument.Open(file, false);
                retVal = wordDoc.MainDocumentPart.Document.Body.InnerText;
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }

            return(retVal);
        }
Exemple #5
0
        public static string Parse(FileStream file)
        {
            string retVal = "";

            try
            {
                HSSFWorkbook   hssfwb    = new HSSFWorkbook(file);
                ExcelExtractor extractor = new ExcelExtractor(hssfwb);
                retVal = extractor.Text;
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }

            return(retVal);
        }
Exemple #6
0
        public static string Parse(FileStream file)
        {
            string retVal = "";

            try
            {
                PresentationDocument doc;
                int numberOfSlides = CountSlides(out doc, file);
                for (int i = 0; i < numberOfSlides; i++)
                {
                    string slideText;
                    GetSlideIdAndText(out slideText, doc, i);
                    retVal += slideText + Environment.NewLine;
                }
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }
            return(retVal);
        }
Exemple #7
0
        public static string Parse(FileStream file)
        {
            string retVal = "";
            var    sb     = new StringBuilder();

            try
            {
                var reader        = new PdfReader(file);
                var numberOfPages = reader.NumberOfPages;

                for (var currentPageIndex = 1; currentPageIndex <= numberOfPages; currentPageIndex++)
                {
                    sb.Append(PdfTextExtractor.GetTextFromPage(reader, currentPageIndex));
                }
                retVal = sb.ToString();
            }
            catch (Exception ex)
            {
                ParsersInfra.RecordParsingFailure(Log, ex, file);
            }
            return(retVal);
        }