public static string Parse(FileStream file) { string retVal = ""; try { SpreadsheetDocument excelDoc = SpreadsheetDocument.Open(file, false); foreach (Sheet sheet in excelDoc.WorkbookPart.Workbook.Descendants <Sheet>()) { WorksheetPart worksheetPart = (WorksheetPart)excelDoc.WorkbookPart.GetPartById(sheet.Id); Worksheet worksheet = worksheetPart.Worksheet; SharedStringTablePart shareStringPart = excelDoc.WorkbookPart.GetPartsOfType <SharedStringTablePart>().First(); SharedStringItem[] items = shareStringPart.SharedStringTable.Elements <SharedStringItem>().ToArray(); // Create a new filename and save this file out. foreach (var row in worksheet.Descendants <Row>()) { StringBuilder sb = new StringBuilder(); foreach (Cell cell in row) { string value = string.Empty; if (cell.CellValue != null) { // If the content of the first cell is stored as a shared string, get the text // from the SharedStringTablePart. Otherwise, use the string value of the cell. if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) { value = items[int.Parse(cell.CellValue.Text)].InnerText; } else { value = cell.CellValue.Text; } } // to be safe, always use double quotes. sb.Append(string.Format("\"{0}\"\t", value.Trim())); } retVal += sb.ToString().TrimEnd(',') + System.Environment.NewLine; } } } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { MsgReader.Reader read = new Reader(); string retVal = ""; try { using (MemoryStream ms = new MemoryStream()) { if (file.Name.ToLower().EndsWith("eml")) { file.CopyTo(ms); MsgReader.Mime.Message msg = new MsgReader.Mime.Message(ms.ToArray()); retVal += "From: " + (msg.Headers.From == null ? "" : msg.Headers.From.DisplayName) + Environment.NewLine; retVal += "To: " + ParseAddresses(msg.Headers.To) + Environment.NewLine; retVal += "Cc: " + ParseAddresses(msg.Headers.Cc) + Environment.NewLine; retVal += "Bcc: " + ParseAddresses(msg.Headers.Bcc) + Environment.NewLine; retVal += "Subject: " + msg.Headers.Subject + Environment.NewLine; retVal += msg.TextBody.GetBodyAsText(); } else if (file.Name.ToLower().EndsWith("msg")) { MsgReader.Outlook.Storage.Message msg = new MsgReader.Outlook.Storage.Message(file, FileAccess.Read); retVal += "From: " + (msg.Sender == null ? "" : msg.Sender.DisplayName + " " + msg.Sender.Email) + Environment.NewLine; foreach (MsgReader.Outlook.Storage.Recipient rec in msg.Recipients) { retVal += "To: " + rec.DisplayName + " " + rec.Email + Environment.NewLine; } retVal += "Subject: " + msg.Subject + Environment.NewLine; retVal += msg.BodyText; foreach (MsgReader.Outlook.Storage.Attachment a in msg.Attachments) { string fileName = DateTime.Now.Ticks.ToString() + a.FileName; File.WriteAllBytes(_tmpFilesFolder + "\\" + fileName, a.Data); FileInfo f = new FileInfo(fileName); retVal += fileName + ": " + FileUtils.ParseSync(new FileInfo(_tmpFilesFolder + "\\" + fileName)); } } } } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { string retVal = ""; try { Spire.Doc.Document doc = new Spire.Doc.Document(file); retVal = doc.GetText(); } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { string retVal = ""; try { WordprocessingDocument wordDoc = WordprocessingDocument.Open(file, false); retVal = wordDoc.MainDocumentPart.Document.Body.InnerText; } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { string retVal = ""; try { HSSFWorkbook hssfwb = new HSSFWorkbook(file); ExcelExtractor extractor = new ExcelExtractor(hssfwb); retVal = extractor.Text; } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { string retVal = ""; try { PresentationDocument doc; int numberOfSlides = CountSlides(out doc, file); for (int i = 0; i < numberOfSlides; i++) { string slideText; GetSlideIdAndText(out slideText, doc, i); retVal += slideText + Environment.NewLine; } } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }
public static string Parse(FileStream file) { string retVal = ""; var sb = new StringBuilder(); try { var reader = new PdfReader(file); var numberOfPages = reader.NumberOfPages; for (var currentPageIndex = 1; currentPageIndex <= numberOfPages; currentPageIndex++) { sb.Append(PdfTextExtractor.GetTextFromPage(reader, currentPageIndex)); } retVal = sb.ToString(); } catch (Exception ex) { ParsersInfra.RecordParsingFailure(Log, ex, file); } return(retVal); }