示例#1
0
        public static string Extract(string path)
        {
            StringBuilder sb     = new StringBuilder();
            IFilter       filter = null;

            try
            {
                string sciezka = Directory.GetCurrentDirectory();
                filter = loadIFilter(path);

                if (filter == null)
                {
                    return(String.Empty);
                }

                uint       i  = 0;
                STAT_CHUNK ps = new STAT_CHUNK();

                IFILTER_INIT iflags =
                    IFILTER_INIT.CANON_HYPHENS |
                    IFILTER_INIT.CANON_PARAGRAPHS |
                    IFILTER_INIT.CANON_SPACES |                                                 // to chyba wsadza wszystkie spacje zamiast zpecialnych znakow
                    IFILTER_INIT.HARD_LINE_BREAKS |
                    IFILTER_INIT.APPLY_CRAWL_ATTRIBUTES |
                    IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
                    IFILTER_INIT.APPLY_OTHER_ATTRIBUTES |
                    IFILTER_INIT.SEARCH_LINKS |
                    IFILTER_INIT.FILTER_OWNED_VALUE_OK;

                if (filter.Init(iflags, 0, null, ref i) != (int)IFilterReturnCodes.S_OK)
                {
                    throw new Exception("Problem initializing an IFilter for:\n" + path + " \n\n");
                }

                while (filter.GetChunk(out ps) == (int)(IFilterReturnCodes.S_OK))
                {
                    if (ps.flags == CHUNKSTATE.CHUNK_TEXT)
                    {
                        IFilterReturnCodes scode = 0;
                        while (scode == IFilterReturnCodes.S_OK || scode == IFilterReturnCodes.FILTER_S_LAST_TEXT)
                        {
                            uint pcwcBuffer = 65536;
                            System.Text.StringBuilder sbBuffer = new System.Text.StringBuilder((int)pcwcBuffer);

                            scode = (IFilterReturnCodes)filter.GetText(ref pcwcBuffer, sbBuffer);

                            // Kamil
                            // scode = (IFilterReturnCodes)filter.GetChunk( (ref pcwcBuffer, sbBuffer);

                            if (pcwcBuffer > 0 && sbBuffer.Length > 0)
                            {
                                if (sbBuffer.Length < pcwcBuffer) // Should never happen, but it happens !
                                {
                                    pcwcBuffer = (uint)sbBuffer.Length;
                                }

                                sb.Append(sbBuffer.ToString(0, (int)pcwcBuffer));
                                sb.Append(" "); // "\r\n"
                            }
                        }
                    }
                }
            }
            finally
            {
                if (filter != null)
                {
                    Marshal.ReleaseComObject(filter);
                    System.GC.Collect();
                    System.GC.WaitForPendingFinalizers();
                }
            }

            return(sb.ToString());
        }
示例#2
0
        public static string Extract(string path)
        {
            StringBuilder sb = new StringBuilder();
            IFilter filter = null;

            try
            {
                filter = loadIFilter(path);

                if (filter == null)
                    return String.Empty;

                uint i = 0;
                STAT_CHUNK ps = new STAT_CHUNK();

                IFILTER_INIT iflags =
                    IFILTER_INIT.CANON_HYPHENS |
                    IFILTER_INIT.CANON_PARAGRAPHS |
                    IFILTER_INIT.CANON_SPACES |
                    IFILTER_INIT.APPLY_CRAWL_ATTRIBUTES |
                    IFILTER_INIT.APPLY_INDEX_ATTRIBUTES |
                    IFILTER_INIT.APPLY_OTHER_ATTRIBUTES |
                    IFILTER_INIT.HARD_LINE_BREAKS |
                    IFILTER_INIT.SEARCH_LINKS |
                    IFILTER_INIT.FILTER_OWNED_VALUE_OK;

                if (filter.Init(iflags, 0, null, ref i) != (int)IFilterReturnCodes.S_OK)
                    throw new Exception("Problem initializing an IFilter for:\n" + path + " \n\n");

                while (filter.GetChunk(out ps) == (int)(IFilterReturnCodes.S_OK))
                {
                    if (ps.flags == CHUNKSTATE.CHUNK_TEXT)
                    {
                        IFilterReturnCodes scode = 0;
                        while (scode == IFilterReturnCodes.S_OK || scode == IFilterReturnCodes.FILTER_S_LAST_TEXT)
                        {
                            uint pcwcBuffer = 65536;
                            System.Text.StringBuilder sbBuffer = new System.Text.StringBuilder((int)pcwcBuffer);

                            scode = (IFilterReturnCodes)filter.GetText(ref pcwcBuffer, sbBuffer);

                            if (pcwcBuffer > 0 && sbBuffer.Length > 0)
                            {
                                if (sbBuffer.Length < pcwcBuffer) // Should never happen, but it happens !
                                    pcwcBuffer = (uint)sbBuffer.Length;

                                sb.Append(sbBuffer.ToString(0, (int)pcwcBuffer));
                                sb.Append(" "); // "\r\n"
                            }

                        }
                    }

                }
            }
            finally
            {
                if (filter != null)
                {
                    Marshal.ReleaseComObject(filter);
                    System.GC.Collect();
                    System.GC.WaitForPendingFinalizers();
                }
            }

            return sb.ToString();
        }