/// <summary> /// Processe the specified file /// </summary> /// <exception cref="TextFilterException">If the filter cannot be created or initialised for this instance</exception> internal void ProcessInSTAThread() { // Exceptions in a thread are not automatically thrown in the main thread try { // Intialize IFilter iflt = null; IUnknown iunk = null; // Try to load the corresponding IFilter int i = LoadIFilter(mFile, ref iunk, ref iflt); if (i != (int)IFilterReturnCodes.S_OK) { throw new TextFilterException( String.Format("IFilter instance not found for file {0}", mFile)); } // More initializing IFilterReturnCodes scode; mDocumentText = new StringBuilder(); mDocumentProperties = new ListDictionary( CaseInsensitiveComparer.Default); // Try to initialize the IFilter int attr = 0; IFILTER_FLAGS flagsSet = 0; scode = iflt.Init(mFlags, attr, IntPtr.Zero, ref flagsSet); if (scode != IFilterReturnCodes.S_OK) { throw new TextFilterException( String.Format("IFilter initialisation failed: {0}", Enum.GetName(scode.GetType(), scode))); } // More initializing int bufferSize = 65536; StringBuilder buffer = new StringBuilder(bufferSize, bufferSize); // Allocate memory for the propvariant IntPtr propvariantPtr = Marshal.AllocCoTaskMem(10000); // Get all chunks from the filter STAT_CHUNK chunkStatus = new STAT_CHUNK(); while (scode == IFilterReturnCodes.S_OK) { // Get chunk scode = iflt.GetChunk(ref chunkStatus); if (scode == IFilterReturnCodes.S_OK) { // Text chunk if (chunkStatus.flags == CHUNKSTATE.CHUNK_TEXT) { // Get text bufferSize = 65536; IFilterReturnCodes scodeText = iflt.GetText( ref bufferSize, buffer); // Append text if buffer size greater than zero if (bufferSize > 0) mDocumentText.Append(buffer.ToString(0, bufferSize)); } else if (chunkStatus.flags == CHUNKSTATE.CHUNK_VALUE) { // Get property id PROPID propId = (PROPID)((int)chunkStatus.attribute.psProperty.propid); // Get the value IFilterReturnCodes scodeGetValue = iflt.GetValue(ref propvariantPtr); // Check return value if (scodeGetValue == IFilterReturnCodes.S_OK || scodeGetValue == IFilterReturnCodes.FILTER_S_LAST_VALUES) { // Get the prop variant PROPVARIANT propvariant = (PROPVARIANT)Marshal.PtrToStructure(propvariantPtr, typeof(PROPVARIANT)); // Get the property if (propvariant.vt == (int)VariantTypes.VT_LPSTR || propvariant.vt == (int)VariantTypes.VT_LPWSTR) { // Get prop name string propName = propId.ToString(); if (propName.Length > 4) propName = propName.Substring(4).ToLower(); // Get property mDocumentProperties[propName] = Marshal.PtrToStringAuto(propvariant.data); } // Free referenced memory Marshal.DestroyStructure(propvariantPtr, typeof(PROPVARIANT)); } } } } // Deallocate memory Marshal.FreeCoTaskMem(propvariantPtr); } catch (Exception exception) { mErrorMessage = "TextFilter error: " + exception.Message; } }
public static string Extract(string path) { StringBuilder sb = new StringBuilder(); IFilter filter = null; try { filter = loadIFilter(path); if (filter == null) return String.Empty; uint i = 0; STAT_CHUNK ps = new STAT_CHUNK(); IFILTER_INIT iflags = IFILTER_INIT.CANON_HYPHENS | IFILTER_INIT.CANON_PARAGRAPHS | IFILTER_INIT.CANON_SPACES | IFILTER_INIT.APPLY_CRAWL_ATTRIBUTES | IFILTER_INIT.APPLY_INDEX_ATTRIBUTES | IFILTER_INIT.APPLY_OTHER_ATTRIBUTES | IFILTER_INIT.HARD_LINE_BREAKS | IFILTER_INIT.SEARCH_LINKS | IFILTER_INIT.FILTER_OWNED_VALUE_OK; if (filter.Init(iflags, 0, null, ref i) != (int)IFilterReturnCodes.S_OK) throw new Exception("Problem initializing an IFilter for:\n" + path + " \n\n"); while (filter.GetChunk(out ps) == (int)(IFilterReturnCodes.S_OK)) { if (ps.flags == CHUNKSTATE.CHUNK_TEXT) { IFilterReturnCodes scode = 0; while (scode == IFilterReturnCodes.S_OK || scode == IFilterReturnCodes.FILTER_S_LAST_TEXT) { uint pcwcBuffer = 65536; System.Text.StringBuilder sbBuffer = new System.Text.StringBuilder((int)pcwcBuffer); scode = (IFilterReturnCodes)filter.GetText(ref pcwcBuffer, sbBuffer); if (pcwcBuffer > 0 && sbBuffer.Length > 0) { if (sbBuffer.Length < pcwcBuffer) // Should never happen, but it happens ! pcwcBuffer = (uint)sbBuffer.Length; sb.Append(sbBuffer.ToString(0, (int)pcwcBuffer)); sb.Append(" "); // "\r\n" } } } } } finally { if (filter != null) { Marshal.ReleaseComObject(filter); System.GC.Collect(); System.GC.WaitForPendingFinalizers(); } } return sb.ToString(); }