using System.Runtime.InteropServices; using Microsoft.Office.Interop.Word; public string ExtractTextFromWordFile(string filePath) { string text = ""; var app = new Application(); var doc = app.Documents.Open(filePath); var range = doc.Content; range.Select(); var ifilters = AppDomain.CurrentDomain.GetAssemblies() .SelectMany(x => x.GetTypes()) .Where(x => typeof(IFilter).IsAssignableFrom(x)); var wordFilter = ifilters.FirstOrDefault(x => x.Name == "FilterReader"); if (wordFilter != null) { using (var filter = (IFilter)Activator.CreateInstance(wordFilter)) { filter.Load(IFilterInitFlags.NONE, filePath, null); while (filter.Read(out var chunk)) { text += chunk.Value; } filter.Dispose(); } } doc.Close(); app.Quit(); return text; }
using System.Runtime.InteropServices; using System.Runtime.InteropServices.ComTypes; public void ExtractPdfMetadata(string filePath) { var ifilters = AppDomain.CurrentDomain.GetAssemblies() .SelectMany(x => x.GetTypes()) .Where(x => typeof(IFilter).IsAssignableFrom(x)); var adobeFilter = ifilters.FirstOrDefault(x => x.FullName.StartsWith("Adobe.PDF.IFilter") || x.FullName.StartsWith("AdobeXMP.IFilter")); if (adobeFilter != null) { using (var filter = (IFilter)Activator.CreateInstance(adobeFilter)) { filter.Load(IFilterInitFlags.NONE, filePath, null); while (filter.GetChunk(out STAT_CHUNK stat) != 0) { System.Diagnostics.Debug.WriteLine(stat.attribute.psProperty.ulKind.ToString()); System.Diagnostics.Debug.WriteLine(filter.GetValue().ToString()); } filter.Dispose(); } } }This code retrieves the Adobe PDF IFilter and loads it with a PDF file path. It then iterates through the metadata chunks in the file and retrieves each attribute and its value. The output is written to the debug console.