Beispiel #1
0
        /// <summary> 
        /// Processe the specified file 
        /// </summary> 
        /// <exception cref="TextFilterException">If the filter cannot be created or initialised for this instance</exception> 
        internal void ProcessInSTAThread()
        {
            // Exceptions in a thread are not automatically thrown in the main thread
            try
            {
                // Intialize
                IFilter iflt = null;
                IUnknown iunk = null;

                // Try to load the corresponding IFilter
                int i = LoadIFilter(mFile, ref iunk, ref iflt);
                if (i != (int)IFilterReturnCodes.S_OK)
                {
                    throw new TextFilterException(
                    String.Format("IFilter instance not found for file {0}", mFile));
                }

                // More initializing
                IFilterReturnCodes scode;
                mDocumentText = new StringBuilder();
                mDocumentProperties = new ListDictionary(
                CaseInsensitiveComparer.Default);

                // Try to initialize the IFilter
                int attr = 0;
                IFILTER_FLAGS flagsSet = 0;
                scode = iflt.Init(mFlags, attr, IntPtr.Zero, ref flagsSet);
                if (scode != IFilterReturnCodes.S_OK)
                {
                    throw new TextFilterException(
                    String.Format("IFilter initialisation failed: {0}",
                    Enum.GetName(scode.GetType(), scode)));
                }

                // More initializing
                int bufferSize = 65536;
                StringBuilder buffer = new StringBuilder(bufferSize, bufferSize);

                // Allocate memory for the propvariant
                IntPtr propvariantPtr = Marshal.AllocCoTaskMem(10000);

                // Get all chunks from the filter
                STAT_CHUNK chunkStatus = new STAT_CHUNK();
                while (scode == IFilterReturnCodes.S_OK)
                {
                    // Get chunk
                    scode = iflt.GetChunk(ref chunkStatus);
                    if (scode == IFilterReturnCodes.S_OK)
                    {
                        // Text chunk
                        if (chunkStatus.flags == CHUNKSTATE.CHUNK_TEXT)
                        {
                            // Get text
                            bufferSize = 65536;
                            IFilterReturnCodes scodeText = iflt.GetText(
                            ref bufferSize, buffer);

                            // Append text if buffer size greater than zero
                            if (bufferSize > 0)
                                mDocumentText.Append(buffer.ToString(0, bufferSize));
                        }
                        else if (chunkStatus.flags == CHUNKSTATE.CHUNK_VALUE)
                        {
                            // Get property id
                            PROPID propId = (PROPID)((int)chunkStatus.attribute.psProperty.propid);

                            // Get the value
                            IFilterReturnCodes scodeGetValue = iflt.GetValue(ref propvariantPtr);

                            // Check return value
                            if (scodeGetValue == IFilterReturnCodes.S_OK || scodeGetValue == IFilterReturnCodes.FILTER_S_LAST_VALUES)
                            {
                                // Get the prop variant
                                PROPVARIANT propvariant = (PROPVARIANT)Marshal.PtrToStructure(propvariantPtr, typeof(PROPVARIANT));

                                // Get the property
                                if (propvariant.vt == (int)VariantTypes.VT_LPSTR || propvariant.vt == (int)VariantTypes.VT_LPWSTR)
                                {
                                    // Get prop name
                                    string propName = propId.ToString();
                                    if (propName.Length > 4)
                                        propName = propName.Substring(4).ToLower();

                                    // Get property
                                    mDocumentProperties[propName] =
                                    Marshal.PtrToStringAuto(propvariant.data);
                                }

                                // Free referenced memory
                                Marshal.DestroyStructure(propvariantPtr, typeof(PROPVARIANT));
                            }
                        }
                    }
                }

                // Deallocate memory
                Marshal.FreeCoTaskMem(propvariantPtr);
            }
            catch (Exception exception)
            {
                mErrorMessage = "TextFilter error: " + exception.Message;
            }
        }
Beispiel #2
0
        public static string Extract(string path)
        {
            StringBuilder sb = new StringBuilder();
            IFilter filter = null;

            try
            {
                filter = loadIFilter(path);

                if (filter == null)
                    return String.Empty;

                uint i = 0;
                STAT_CHUNK ps = new STAT_CHUNK();

                IFILTER_INIT iflags =
                    IFILTER_INIT.CANON_HYPHENS |
                    IFILTER_INIT.CANON_PARAGRAPHS |
                    IFILTER_INIT.CANON_SPACES |
                    IFILTER_INIT.APPLY_CRAWL_ATTRIBUTES |
                    IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
                    IFILTER_INIT.APPLY_OTHER_ATTRIBUTES |
                    IFILTER_INIT.HARD_LINE_BREAKS |
                    IFILTER_INIT.SEARCH_LINKS |
                    IFILTER_INIT.FILTER_OWNED_VALUE_OK;

                if (filter.Init(iflags, 0, null, ref i) != (int)IFilterReturnCodes.S_OK)
                    throw new Exception("Problem initializing an IFilter for:\n" + path + " \n\n");

                while (filter.GetChunk(out ps) == (int)(IFilterReturnCodes.S_OK))
                {
                    if (ps.flags == CHUNKSTATE.CHUNK_TEXT)
                    {
                        IFilterReturnCodes scode = 0;
                        while (scode == IFilterReturnCodes.S_OK || scode == IFilterReturnCodes.FILTER_S_LAST_TEXT)
                        {
                            uint pcwcBuffer = 65536;
                            System.Text.StringBuilder sbBuffer = new System.Text.StringBuilder((int)pcwcBuffer);

                            scode = (IFilterReturnCodes)filter.GetText(ref pcwcBuffer, sbBuffer);

                            if (pcwcBuffer > 0 && sbBuffer.Length > 0)
                            {
                                if (sbBuffer.Length < pcwcBuffer) // Should never happen, but it happens !
                                    pcwcBuffer = (uint)sbBuffer.Length;

                                sb.Append(sbBuffer.ToString(0, (int)pcwcBuffer));
                                sb.Append(" "); // "\r\n"
                            }

                        }
                    }

                }
            }
            finally
            {
                if (filter != null)
                {
                    Marshal.ReleaseComObject(filter);
                    System.GC.Collect();
                    System.GC.WaitForPendingFinalizers();
                }
            }

            return sb.ToString();
        }