Beispiel #1
0
        public DocxParser(string fileName)
        {
            path = Path.GetTempPath() + Path.DirectorySeparatorChar + "tmp-" + DateTime.Now.Ticks.ToString();
            File.Copy(fileName, path);

            doc = WordprocessingDocument.Open(path, true);

            parsedFile = new ContentFile()
            {
                contentRaw = doc.MainDocumentPart.Document.InnerXml,
                contentText = ParseFileContent(XElement.Parse(doc.MainDocumentPart.Document.InnerXml))
            };

            SetMetadata(MetadataType.Author, doc.PackageProperties.Creator);
            SetMetadata(MetadataType.Description, doc.PackageProperties.Description);
            SetMetadata(MetadataType.Language, doc.PackageProperties.Language);
            SetMetadata(MetadataType.Subject, doc.PackageProperties.Subject);
            SetMetadata(MetadataType.Title, doc.PackageProperties.Title);
            SetMetadata(MetadataType.Type, doc.PackageProperties.ContentType);

            if (doc.PackageProperties.Created.HasValue)
                SetMetadata(MetadataType.PublishDate, doc.PackageProperties.Created.Value.ToString());

            LoadAllMetadata();
        }
Beispiel #2
0
        //PdfWriter pdfWriter;
        public PdfParser(string fileName)
        {
            pdfReader = new PdfReader(fileName);

            parsedFile = new ContentFile()
            {
                contentText = GetFileContent()
            };
        }
        public PlainTextParser(string filename)
        {
            string fileContent = "";
            using(StreamReader sr = new StreamReader(new FileStream(filename, FileMode.Open)))
            {
                fileContent = sr.ReadToEnd();
            }

            parsedFile = new ContentFile()
            {
                contentText = fileContent
            };
        }
Beispiel #4
0
 public DocParser(object fileName)
 {
     WordApp = new Word.Application();
     doc = WordApp.Documents.Open(ref fileName, ref missing, ref readOnly,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing, ref missing, ref isVisible,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing);
     parsedFile = new ContentFile()
     {
         contentRaw = doc.WordOpenXML,
         contentText = GetFileContent(),
     };
 }
Beispiel #5
0
        public DocxParser(ContentFile file)
        {
            parsedFile = file;
            path = Path.GetTempPath() + Path.DirectorySeparatorChar + "tmp-" + DateTime.Now.Ticks.ToString();

            doc = WordprocessingDocument.Create(path, DocumentFormat.OpenXml.WordprocessingDocumentType.Document);

            var mainPart = doc.AddMainDocumentPart();
            var paragraph = new Paragraph(new Run(new Text(file.contentText)));

            SetMetadata(MetadataType.Author, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Description, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Language, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Subject, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Title, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Type, file.Metadata[MetadataType.Author]);
        }