Пример #1
0
        public DocxParser(string fileName)
        {
            path = Path.GetTempPath() + Path.DirectorySeparatorChar + "tmp-" + DateTime.Now.Ticks.ToString();
            File.Copy(fileName, path);

            doc = WordprocessingDocument.Open(path, true);

            parsedFile = new ContentFile()
            {
                contentRaw = doc.MainDocumentPart.Document.InnerXml,
                contentText = ParseFileContent(XElement.Parse(doc.MainDocumentPart.Document.InnerXml))
            };

            SetMetadata(MetadataType.Author, doc.PackageProperties.Creator);
            SetMetadata(MetadataType.Description, doc.PackageProperties.Description);
            SetMetadata(MetadataType.Language, doc.PackageProperties.Language);
            SetMetadata(MetadataType.Subject, doc.PackageProperties.Subject);
            SetMetadata(MetadataType.Title, doc.PackageProperties.Title);
            SetMetadata(MetadataType.Type, doc.PackageProperties.ContentType);

            if (doc.PackageProperties.Created.HasValue)
                SetMetadata(MetadataType.PublishDate, doc.PackageProperties.Created.Value.ToString());

            LoadAllMetadata();
        }
Пример #2
0
        //PdfWriter pdfWriter;
        public PdfParser(string fileName)
        {
            pdfReader = new PdfReader(fileName);

            parsedFile = new ContentFile()
            {
                contentText = GetFileContent()
            };
        }
Пример #3
0
        public PlainTextParser(string filename)
        {
            string fileContent = "";
            using(StreamReader sr = new StreamReader(new FileStream(filename, FileMode.Open)))
            {
                fileContent = sr.ReadToEnd();
            }

            parsedFile = new ContentFile()
            {
                contentText = fileContent
            };
        }
Пример #4
0
 public DocParser(object fileName)
 {
     WordApp = new Word.Application();
     doc = WordApp.Documents.Open(ref fileName, ref missing, ref readOnly,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing, ref missing, ref isVisible,
                                                     ref missing, ref missing, ref missing,
                                                     ref missing);
     parsedFile = new ContentFile()
     {
         contentRaw = doc.WordOpenXML,
         contentText = GetFileContent(),
     };
 }
Пример #5
0
        public DocxParser(ContentFile file)
        {
            parsedFile = file;
            path = Path.GetTempPath() + Path.DirectorySeparatorChar + "tmp-" + DateTime.Now.Ticks.ToString();

            doc = WordprocessingDocument.Create(path, DocumentFormat.OpenXml.WordprocessingDocumentType.Document);

            var mainPart = doc.AddMainDocumentPart();
            var paragraph = new Paragraph(new Run(new Text(file.contentText)));

            SetMetadata(MetadataType.Author, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Description, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Language, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Subject, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Title, file.Metadata[MetadataType.Author]);
            SetMetadata(MetadataType.Type, file.Metadata[MetadataType.Author]);
        }