Exemple #1
0
 bool IResourceTextProvider.ProcessResourceText(IResource resource, IResourceTextConsumer consumer)
 {
     try
     {
         StreamReader reader = Core.FileResourceManager.GetStreamReader(resource);
         if (reader != null)
         {
             using ( reader )
             {
                 // for weblinks, detect & set charset if it is not set
                 IResource source = resource.GetLinkProp("Source");
                 if (source != null)
                 {
                     string charset = source.GetPropText(Core.FileResourceManager.PropCharset);
                     if (charset.Length == 0)
                     {
                         charset = HtmlTools.DetectCharset(reader);
                         new ResourceProxy(source).SetPropAsync(Core.FileResourceManager.PropCharset, charset);
                         reader.BaseStream.Position = 0;
                     }
                 }
                 ProcessResourceStream(resource, source, reader, consumer);
             }
         }
     }
     catch (ObjectDisposedException)
     {
         Core.TextIndexManager.QueryIndexing(resource.Id);
     }
     return(true);
 }
Exemple #2
0
 public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     consumer.AddDocumentFragment(res.Id, res.GetPropText(Core.Props.LongBody));
     consumer.AddDocumentFragment(res.Id, res.GetPropText(Core.ContactManager.Props.LinkFrom),
                                  DocumentSection.SourceSection);
     return(true);
 }
Exemple #3
0
        //---------------------------------------------------------------------
        protected static void    ProcessPDFFile(int ID, string FileName, IResourceTextConsumer consumer)
        {
            Process process = new Process();

            Debug.WriteLine("Starting indexing: " + FileName);
            string workPath = Path.GetTempPath();
            string outFile  = Path.Combine(workPath, "pdf2text.out");

            try
            {
                process.StartInfo.FileName         = "pdftotext.exe";
                process.StartInfo.Arguments        = " -lowprio " + Utils.QuotedString(FileName) + " " + outFile;
                process.StartInfo.WorkingDirectory = workPath;
                process.StartInfo.CreateNoWindow   = true;
                process.StartInfo.UseShellExecute  = false;
                if (!process.Start())
                {
                    throw(new Exception("Aplication did not managed to call Start for the process with filename: " + FileName));
                }
                process.WaitForExit();

                StreamReader reader = new StreamReader(outFile);
                string       Buffer = Utils.StreamReaderReadToEnd(reader);
                reader.Close();
                File.Delete(outFile);

                consumer.AddDocumentFragment(ID, Buffer);
            }
            catch (Exception exc_)
            {
                Debug.WriteLine("Can not start process [" + process.StartInfo.FileName + process.StartInfo.Arguments + "] with reason " + exc_.Message);
            }
        }
Exemple #4
0
 bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     if (res != null)
     {
         int id = res.Id;
         if (res.Type == _Note)
         {
             string       longBody = res.GetPropText(Core.Props.LongBody);
             StringReader reader   = new StringReader(longBody);
             using (HTMLParser parser = new HTMLParser(reader, true))
             {
                 while (!parser.Finished)
                 {
                     string fragment = parser.ReadNextFragment();
                     if (fragment.Length > 0)
                     {
                         if (parser.InHeading)
                         {
                             consumer.AddDocumentHeading(res.Id, fragment);
                         }
                         else
                         {
                             consumer.AddDocumentFragment(res.Id, fragment);
                         }
                     }
                 }
             }
             consumer.RestartOffsetCounting();
             consumer.AddDocumentHeading(id, res.GetPropText(Core.Props.Subject));
         }
     }
     return(true);
 }
Exemple #5
0
        private void ProcessResourceStream(IResource resource, StreamReader reader, IResourceTextConsumer consumer)
        {
            StringBuilder builder = new StringBuilder();
            int           aChar, lastChar = 0;
            char          c;

            while ((aChar = reader.Read()) != -1)
            {
                if ((aChar != 0x0a && aChar != 0x0d) || lastChar != 0x0d)
                {
                    c = (char)aChar;
                    builder.Append(c);
                    if (builder.Length > 4000 &&
                        (Char.IsWhiteSpace(c) || Char.IsPunctuation(c)))
                    {
                        consumer.AddDocumentFragment(resource.Id, builder.ToString());
                        builder.Length = 0;
                    }
                }
                lastChar = aChar;
            }
            if (builder.Length > 0)
            {
                consumer.AddDocumentFragment(resource.Id, builder.ToString());
            }
        }
Exemple #6
0
        bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            #region Preconditions
            Guard.NullArgument(res, "res");
            Guard.NullArgument(consumer, "consumer");
            #endregion Preconditions

            if (res.Type == STR.Email || res.Type == STR.EmailFile)
            {
                if (!ProcessResourceTextImpl(res, consumer))
                {
                    return(false);
                }
            }

/*
 *          IResource mail = res.GetLinkProp( PROP.Attachment );
 *          if( mail != null && mail.Type == STR.Email )
 *          {
 *              consumer.AddDocumentHeading( res.Id, res.GetPropText( Core.Props.Name ) );
 *              IResource resPerson = res.GetLinkProp(PROP.From);
 *              if (resPerson != null)
 *              {
 *                  consumer.AddDocumentFragment( res.Id, resPerson.DisplayName, DocumentSection.SourceSection );
 *              }
 *          }
 */
            return(true);
        }
Exemple #7
0
 public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     if (res.HasProp("Annotation"))
     {
         string anno = res.GetStringProp("Annotation");
         consumer.RestartOffsetCounting();
         consumer.AddDocumentFragment(res.Id, anno, DocumentSection.AnnotationSection);
     }
     return(true);
 }
Exemple #8
0
        private void ProcessRTFFragment(IResource res, IResourceTextConsumer consumer)
        {
            string body = res.GetPropText(Core.Props.LongBody);

            lock (this)
            {
                _converterTextBox.RichText = body;
                consumer.AddDocumentFragment(res.Id, _converterTextBox.PlainText);
            }
        }
Exemple #9
0
 public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     foreach (IResource account in res.GetLinksOfType(null, Props.MirandaAcct))
     {
         foreach (int propId in new[] { Props.NickName, Props.ScreenName, Props.JabberId, Props.YahooId })
         {
             consumer.AddDocumentFragment(res.Id, account.GetStringProp(propId));
         }
     }
     return(true);
 }
Exemple #10
0
 public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     if (res.HasProp(Core.Props.LongBodyIsRTF))
     {
         ProcessRTFFragment(res, consumer);
     }
     else
     {
         ProcessHTMLFragment(res, consumer);
     }
     return(true);
 }
Exemple #11
0
 public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
 {
     if (res.Type == _contactResName)
     {
         IResource icqAcc = res.GetLinkProp(_propICQAcct);
         if (icqAcc != null)
         {
             consumer.AddDocumentFragment(res.Id, icqAcc.GetPropText(_propNickName));
         }
     }
     return(true);
 }
Exemple #12
0
        bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            string body = new ContactBO(res).ContactBody;

            if (body != null)
            {
                lock ( consumer )
                {
                    consumer.AddDocumentFragment(res.Id, body);
                }
            }
            return(true);
        }
Exemple #13
0
        public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            IResource source = res.GetLinkProp("Source");

            if (source != null && source.Type == "Weblink")
            {
                string name = source.GetPropText(Core.Props.Name);
                if (name.Length > 0)
                {
                    consumer.AddDocumentHeading(res.Id, name);
                }
            }
            return(true);
        }
Exemple #14
0
        public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            string title = res.GetPropText(Core.Props.Name);

            consumer.AddDocumentHeading(res.Id, title);

            IResource fromPerson = res.GetLinkProp(Core.ContactManager.Props.LinkFrom);

            if (fromPerson != null)
            {
                consumer.AddDocumentFragment(res.Id, fromPerson.DisplayName, DocumentSection.SourceSection);
            }
            return(true);
        }
Exemple #15
0
        bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            if (res != null)
            {
                int id = res.Id;
//                if (res.Type == NntpPlugin._newsArticle || res.Type == NntpPlugin._newsLocalArticle)
                if (NntpPlugin.IsNntpType(res.Type))
                {
                    string text = res.GetPropText(Core.Props.LongBody);
                    if (text.Trim().Length > 0)
                    {
                        consumer.AddDocumentFragment(id, text);
                    }
                    else
                    {
                        HtmlIndexer.IndexHtml(res, res.GetPropText(NntpPlugin._propHtmlContent), consumer, DocumentSection.BodySection);
                    }
                    consumer.RestartOffsetCounting();
                    consumer.AddDocumentHeading(id, res.GetPropText(Core.Props.Subject));

                    IResource author  = res.GetLinkProp(Core.ContactManager.Props.LinkFrom);
                    IResource account = res.GetLinkProp(Core.ContactManager.Props.LinkEmailAcctFrom);
                    if (author != null)
                    {
                        //  Construct [From] section out of contact name and its account
                        string fromText = author.DisplayName;
                        if (account != null)
                        {
                            fromText += " " + account.DisplayName;
                        }
                        consumer.AddDocumentFragment(id, fromText + " ", DocumentSection.SourceSection);
                    }
                    IResourceList groups = res.GetLinksOfType(NntpPlugin._newsGroup, NntpPlugin._propTo);
                    foreach (IResource group in groups)
                    {
                        consumer.AddDocumentFragment(id, group.GetPropText(Core.Props.Name) + " ", DocumentSection.SourceSection);
                    }
                }
                else
                {
                    IResource article = res.GetLinkProp(NntpPlugin._propAttachment);
                    if (article != null && NntpPlugin.IsNntpType(article.Type))
                    {
                        consumer.AddDocumentHeading(id, res.GetPropText(Core.Props.Name));
                    }
                }
            }
            return(true);
        }
Exemple #16
0
 static bool ProcessResourceTextImpl(IResource res, IResourceTextConsumer consumer)
 {
     OutlookProcessor.CheckState();
     try
     {
         OutlookProcessor processor = OutlookSession.OutlookProcessor;
         if (processor != null)
         {
             MailBodyDescriptorDelegate myDelegate = CreateMailBodyDescriptor;
             MailBodyDescriptor         mailBody   = (MailBodyDescriptor)processor.RunUniqueJob(myDelegate, res);
             if (mailBody != null && Core.State != CoreState.ShuttingDown)
             {
                 //  Order of sections: Source, Subject, Body.
                 IResource resPerson  = res.GetLinkProp(Core.ContactManager.Props.LinkFrom);
                 IResource resAccount = res.GetLinkProp(PROP.EmailAccountFrom);
                 if (resPerson != null)
                 {
                     //  Construct [From] section out of contact name and its account
                     string fromText = resPerson.DisplayName;
                     if (resAccount != null)
                     {
                         fromText += " " + resAccount.DisplayName;
                     }
                     consumer.AddDocumentFragment(res.Id, fromText, DocumentSection.SourceSection);
                 }
                 consumer.AddDocumentHeading(res.Id, mailBody.Subject);
                 consumer.RestartOffsetCounting();
                 if (mailBody.IsHTML)
                 {
                     HtmlIndexer.IndexHtml(res, mailBody.Body, consumer, DocumentSection.BodySection);
                 }
                 else
                 {
                     consumer.AddDocumentFragment(res.Id, mailBody.Body.Replace("\r\n", "\n"));
                 }
             }
         }
     }
     catch (OutlookThreadTimeoutException)
     {
         if (consumer.Purpose == TextRequestPurpose.Indexing)
         {
             // retry indexing of the email later
             Guard.QueryIndexingWithCheckId(res);
         }
         return(false);
     }
     return(true);
 }
Exemple #17
0
            public bool ProcessResourceText(IResource res, IResourceTextConsumer consumer)
            {
                consumer.AddDocumentFragment(res.Id, res.GetPropText("Name"),
                                             DocumentSection.SubjectSection);

                foreach (IResource author in res.GetLinksOfType(null, PropTypes.BookAuthor))
                {
                    consumer.AddDocumentFragment(res.Id, author.DisplayName,
                                                 DocumentSection.SourceSection);
                }

                consumer.AddDocumentFragment(res.Id, res.GetPropText(PropTypes.Isbn));

                return(true);
            }
Exemple #18
0
        bool IResourceTextProvider.ProcessResourceText(IResource task, IResourceTextConsumer consumer)
        {
            string subject = task.GetPropText(Core.Props.Subject);

            if (subject.Length > 0)
            {
                consumer.AddDocumentHeading(task.Id, subject);
            }
            string description = task.GetPropText(_propDescription);

            if (description.Length > 0)
            {
                consumer.AddDocumentFragment(task.Id, description);
            }
            return(true);
        }
Exemple #19
0
        bool IResourceTextProvider.ProcessResourceText(IResource resource, IResourceTextConsumer consumer)
        {
            Debug.Assert(resource.Type == "PdfFile", "PDFPlugin doesn't process resources of type " + resource.Type);

            if (consumer.Purpose == TextRequestPurpose.ContextExtraction &&
                resource.GetIntProp("Size") > MaxFileSize)
            {
                return(false);
            }

            string name = Core.FileResourceManager.GetSourceFile(resource);

            if (name != null)
            {
                ProcessPDFFile(resource.Id, name, consumer);
                Core.FileResourceManager.CleanupSourceFile(resource, name);
            }
            return(true);
        }
Exemple #20
0
 bool IResourceTextProvider.ProcessResourceText(IResource resource, IResourceTextConsumer consumer)
 {
     try
     {
         StreamReader reader = Core.FileResourceManager.GetStreamReader(resource);
         if (reader != null)
         {
             using ( reader )
             {
                 ProcessResourceStream(resource, reader, consumer);
             }
         }
     }
     catch (ObjectDisposedException)
     {
         Core.TextIndexManager.QueryIndexing(resource.Id);
     }
     return(true);
 }
Exemple #21
0
        bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            //  Forbid extraction of the excel document body in the context
            //  of "search context extraction" because it is too slow and
            //  can cause indefinite memory consumption.
            if (consumer.Purpose == TextRequestPurpose.ContextExtraction)
            {
                return(false);
            }

            try
            {
                string fileName = Core.FileResourceManager.GetSourceFile(res);
                if (fileName != null)
                {
                    if (!File.Exists(fileName))
                    {
                        return(false);
                    }
                    Trace.WriteLine("Indexing Excel document file " + fileName);
                    RunConverter(fileName, consumer, res);
                    Core.FileResourceManager.CleanupSourceFile(res, fileName);

                    //  If we managed to successfully retrieve the text from the
                    //  doc file (e.g. using newer version of the convertor) -
                    //  clean the [possibly] assigned error sign.
                    new ResourceProxy(res).DeleteProp(Core.Props.LastError);
                }
            }
            catch (Exception e)
            {
                //  If convertion process failed (whatever the reason is) -
                //  remember the error text and show it in the DisplayPane
                //  instead of the actual content.
                new ResourceProxy(res).SetProp(Core.Props.LastError, e.Message);
                Trace.WriteLine("LastError for id=" + res.Id + " - " + e.Message);
            }
            return(true);
        }
Exemple #22
0
 public bool ProcessResourceText(IResource convs, IResourceTextConsumer consumer)
 {
     // index conversation as sequence of fragments, each one is a message
     try
     {
         XmlTextReader reader = new XmlTextReader(new StringReader(ToString(convs)));
         while (reader.Read())
         {
             if (reader.NodeType == XmlNodeType.Element)
             {
                 if (reader.MoveToAttribute("body"))
                 {
                     consumer.AddDocumentFragment(convs.Id, reader.Value);
                 }
                 reader.MoveToElement();
             }
         }
     }
     catch (XmlException)
     {
         // nothing to do with this :(
     }
     return(true);
 }
Exemple #23
0
        internal static void RunConverter(string fileName, IResourceTextConsumer consumer, IResource res)
        {
            Process process = CreateConverterProcess(fileName, true);

            try
            {
                if (!process.Start())
                {
                    throw new Exception();
                }
            }
            catch
            {
                return;
            }
            try
            {
                Encoding     utf8         = new UTF8Encoding(false, false);
                StreamReader outputReader = new StreamReader(process.StandardOutput.BaseStream, utf8);

                string content = Utils.StreamReaderReadToEnd(outputReader);

                consumer.RestartOffsetCounting();                       // Just in case ;)
                HtmlIndexer.IndexHtml(res, content, consumer, DocumentSection.BodySection);
            }
            finally
            {
                string stderr = Utils.StreamReaderReadToEnd(process.StandardError);
                process.WaitForExit();
                if (process.ExitCode != 0)
                {
                    throw new Exception(_converterName + " (Excel-to-HTML) has performed an invalid operation while converting \"" + fileName + "\". " + stderr);
                }
            }
            return;
        }
Exemple #24
0
        /// <summary>
        /// Performs indexing of an HTML text for the specified resource, providing that the offsets stored in the text index correspond to the offsets in the source HTML representation.
        /// </summary>
        /// <param name="resourceId">ID of the resource for which the indexing is being performed.</param>
        /// <param name="html">Html text to be indexed.</param>
        /// <param name="consumer">Consumer that would receive the tokens for indexing.</param>
        /// <param name="section">Document section to which the content being indexed belongs, see <see cref="DocumentSection"/> for some possible values. Passing <c>null</c> impplies on the <see cref="DocumentSection.BodySection"/>.</param>
        /// <remarks>
        /// <para>The indexer extracts plaintext contents from the HTML data and passes to the consumer, ensuring that offsets in the indexed content correspond to the offsets in the HTML text.</para>
        /// <para>If you have indexed other sections before, you should restart the offsets counting by calling the <see cref="IResourceTextConsumer.RestartOffsetCounting"/> manually. This function does not assume that offsets should be reset.</para>
        /// </remarks>
        public static void IndexHtml(int resourceId, string html, IResourceTextConsumer consumer, string section)
        {
            if (html == null)
            {
                throw new ArgumentNullException("html", "HTML body must not be null.");
            }

            int nPrependedChars = 0; // Number of characters added to the content by this method

            // Check the section
            if (section == null)
            {
                section = DocumentSection.BodySection;
            }

            // Add a body tag if it's absent, because it's needed for the HTML parser to mark content as body part content
            if (Utils.IndexOf(html, "<html>", true) < 0 || Utils.IndexOf(html, "<body", true) < 0) // Case-insensitive check
            {
                html             = "<html><body>" + html + "</body></html>";                       // Add this stuff. The problem is that we cannot process correctly the HTML fragments that are not equipped with a <body/> tag
                nPrependedChars += "<html><body>".Length;
            }

            using (HTMLParser parser = new HTMLParser(new StringReader(html)))
            {
                // Breaking fragments into words provides that for each word the offset is guaranteed to be valid
                // Otherwise, after the first entity-reference within the block it would have been shifted from the proper value
                parser.BreakWords = true;

                IResourceTextConsumer consumer2 = consumer as IResourceTextConsumer;
                Debug.Assert(consumer2 != null);          // We should succeed (more or less) even if the consumer passed in does not implement the needed interface
                int    nBeforeHtmlWord;                   // Positioned before the current HTML word in the HTML stream
                int    nAfterHtmlWord  = nPrependedChars; // Positioned after the current HTML word in the HTML stream. Seed by positioning after the prepended content
                int    nWordDifference = 0;               // Difference in the length of the HTML and text representation of the current word, given by nAfterHtmlWord - nBeforeHtmlWord - fragment.Length
                string fragment;
                while (!parser.Finished)
                {
                    fragment = parser.ReadNextFragment(out nBeforeHtmlWord);
                    if (fragment.Length > 0)  // Zero-length fragments are completely ignored
                    {
                        // Adjust the offset
                        if
                        (
                            (consumer2 != null) // The consumer is capable of increasing the offset
                            &&
                            (                   // Increment offsets for indexing and context extraction only
                                (consumer.Purpose == TextRequestPurpose.Indexing) ||
                                (consumer.Purpose == TextRequestPurpose.ContextExtraction)
                            ) &&
                            (nBeforeHtmlWord - nAfterHtmlWord + nWordDifference != 0)           // Prevent from making dummy calls
                        )
                        {
                            consumer2.IncrementOffset(nBeforeHtmlWord - nAfterHtmlWord + nWordDifference);   // For nBeforeHtmlWord, we use the current value (for the current word), nAfterHtmlWord and nWordDifference are taken from the previous step and provide for calculating the introduced difference between the text and HTML representations caused by both entities substitution in the word (nWordDifference) and HTML tags skipped in between (nBeforeHtmlWord - nAfterHtmlWord)
                        }
                        // Process next word
                        consumer.AddDocumentFragment(resourceId, fragment, section);

                        // Adjust pointers
                        nAfterHtmlWord  = parser.Position;
                        nWordDifference = nAfterHtmlWord - nBeforeHtmlWord - fragment.Length;
                    }
                }
            }
        }
Exemple #25
0
 private static void ProcessHTMLFragment(IResource res, IResourceTextConsumer consumer)
 {
     HtmlIndexer.IndexHtml(res, res.GetPropText(Core.Props.LongBody), consumer, null);
 }
Exemple #26
0
 private void ProcessResourceStream(IResource resource, IResource source, TextReader reader,
                                    IResourceTextConsumer consumer)
 {
     _currentIndexedRes = resource;
     try
     {
         using (HTMLParser parser = new HTMLParser(reader))
         {
             parser.CloseReader = false;
             parser.AddTagHandler("link", LinkHandler);
             int    docID = resource.Id;
             string fragment;
             while (!parser.Finished)
             {
                 fragment = parser.ReadNextFragment();
                 if (fragment.Length > 0)
                 {
                     if (parser.InHeading)
                     {
                         consumer.AddDocumentHeading(docID, fragment);
                     }
                     else
                     {
                         consumer.AddDocumentFragment(docID, fragment);
                     }
                 }
             }
             // check whether source resource is favorite and has non-empty name property
             // if it hasn't, or has name equyal to URL then set name from the title of HTML stream
             if (source != null && source.Type == "Weblink")
             {
                 IBookmarkService service = (IBookmarkService)Core.PluginLoader.GetPluginService(typeof(IBookmarkService));
                 if (service != null)
                 {
                     string name = source.GetPropText(Core.Props.Name);
                     string url  = string.Empty;
                     if (Core.ResourceStore.PropTypes.Exist("URL"))
                     {
                         url = source.GetPropText("URL");
                         if (url.StartsWith("http://") || url.StartsWith("file://"))
                         {
                             url = url.Substring("http://".Length);
                         }
                         else if (url.StartsWith("ftp://"))
                         {
                             url = url.Substring("ftp://".Length);
                         }
                     }
                     if (url.IndexOfAny(Path.GetInvalidPathChars()) >= 0)
                     {
                         foreach (char invalidChar in Path.GetInvalidPathChars())
                         {
                             url = url.Replace(invalidChar, '-');
                         }
                     }
                     if (name.Length == 0 || url.StartsWith(name))
                     {
                         string title = parser.Title.Trim();
                         if (title.Length > 0)
                         {
                             IBookmarkProfile profile = service.GetOwnerProfile(source);
                             string           error;
                             if (profile != null && profile.CanRename(source, out error))
                             {
                                 profile.Rename(source, title);
                                 service.SetName(source, title);
                             }
                         }
                     }
                 }
             }
         }
     }
     finally
     {
         _currentIndexedRes = null;
     }
 }
Exemple #27
0
        bool IResourceTextProvider.ProcessResourceText(IResource res, IResourceTextConsumer consumer)
        {
            //  Forbid extraction of the word document body in the context
            //  of "search context extraction" because it is too slow and
            //  can cause indefinite memory consumption.
            if (consumer.Purpose == TextRequestPurpose.ContextExtraction)
            {
                return(false);
            }

            string fileName = Core.FileResourceManager.GetSourceFile(res);

            if (!string.IsNullOrEmpty(fileName))
            {
                bool isRtf;
                try
                {
                    isRtf = IsRtfFile(fileName);
                }
                catch (IOException)                   // see #4335
                {
                    return(false);
                }
                catch (UnauthorizedAccessException)                   // see #6126
                {
                    return(false);
                }

                try
                {
                    if (isRtf)
                    {
                        Trace.WriteLine("Indexing rich text file " + fileName);
                        StreamReader reader = null;
                        try
                        {
                            FileStream fileStream;
                            try
                            {
                                fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                            }
                            catch (IOException ex)                               // OM-7843
                            {
                                Trace.WriteLine("Error opening RTF file for indexing: " + ex.Message);
                                return(false);
                            }
                            reader = new StreamReader(fileStream, Encoding.Default);
                            RTFParser parser = new RTFParser();
                            string    content;
                            try
                            {
                                content = parser.Parse(reader);
                            }
                            catch (Exception ex)
                            {
                                throw new Exception("Error parsing RTF file " + fileName, ex);
                            }
                            consumer.AddDocumentFragment(res.Id, content);
                        }
                        finally
                        {
                            if (reader != null)
                            {
                                reader.Close();
                            }
                        }
                    }
                    else
                    {
                        Trace.WriteLine("Indexing Word document file " + fileName);
                        string text = RunWvWare(fileName, "wvText.xml", true);
                        consumer.AddDocumentFragment(res.Id, text);
                    }

                    Core.FileResourceManager.CleanupSourceFile(res, fileName);

                    //  If we managed to successfully retrieve the text from the
                    //  doc file (e.g. using newer version of the convertor) -
                    //  clean the [possibly] assigned error sign.
                    new ResourceProxy(res).DeleteProp(Core.Props.LastError);
                }
                catch (Exception e)
                {
                    //  If convertion process failed (whatever the reason is) -
                    //  remember the error text and show it in the DisplayPane
                    //  instead of the actual content.
                    new ResourceProxy(res).SetProp(Core.Props.LastError, e.Message);
                    Trace.WriteLine("LastError for id=" + res.Id + " - " + e.Message);
                }
            }
            return(true);
        }
Exemple #28
0
        public void InvokeResourceTextProviders(IResource res, IResourceTextConsumer consumer)
        {
            #region Preconditions
            if (res == null)
            {
                throw new ArgumentNullException("PluginLoader -- Resource is null.");
            }

            if (consumer == null)
            {
                throw new ArgumentNullException("PluginLoader -- IResourceTextConsumer is null.");
            }
            #endregion Preconditions

            bool      isSuccess    = true;
            ArrayList providerList = (ArrayList)_resourceTextProviders [res.Type];
            if (providerList != null)
            {
                lock ( providerList )
                {
                    foreach (IResourceTextProvider provider in providerList)
                    {
                        IResourceTextIndexingPermitter permitter = provider as IResourceTextIndexingPermitter;
                        if (permitter != null && !permitter.CanIndexResource(res))
                        {
                            return;
                        }
                    }
                }
            }
            lock ( _genericResourceTextProviders )
            {
                foreach (IResourceTextProvider provider in _genericResourceTextProviders)
                {
                    IResourceTextIndexingPermitter permitter = provider as IResourceTextIndexingPermitter;
                    if (permitter != null && !permitter.CanIndexResource(res))
                    {
                        return;
                    }
                }
            }
            if (providerList != null)
            {
                lock ( providerList )
                {
                    foreach (IResourceTextProvider provider in providerList)
                    {
                        isSuccess = isSuccess && provider.ProcessResourceText(res, consumer);
                    }
                }
            }
            lock ( _genericResourceTextProviders )
            {
                foreach (IResourceTextProvider provider in _genericResourceTextProviders)
                {
                    isSuccess = isSuccess && provider.ProcessResourceText(res, consumer);
                }
            }
            if (!isSuccess)
            {
                consumer.RejectResult();
            }
        }
Exemple #29
0
 /// <summary>
 /// Performs indexing of an HTML text for the specified resource, providing that the offsets stored in the text index correspond to the offsets in the source HTML representation.
 /// </summary>
 /// <param name="res">Resource for which the indexing is being performed.</param>
 /// <param name="html">Html text to be indexed.</param>
 /// <param name="consumer">Consumer that would receive the tokens for indexing.</param>
 /// <param name="section">Document section to which the content being indexed belongs, see <see cref="DocumentSection"/> for some possible values. Passing <c>null</c> impplies on the <see cref="DocumentSection.BodySection"/>.</param>
 /// <remarks>
 /// <para>The indexer extracts plaintext contents from the HTML data and passes to the consumer, ensuring that offsets in the indexed content correspond to the offsets in the HTML text.</para>
 /// <para>If you have indexed other sections before, you should restart the offsets counting by calling the <see cref="IResourceTextConsumer.RestartOffsetCounting"/> manually. This function does not assume that offsets should be reset.</para>
 /// </remarks>
 public static void IndexHtml(IResource res, string html, IResourceTextConsumer consumer, string section)
 {
     IndexHtml(res.Id, html, consumer, section);
 }
Exemple #30
0
 bool IResourceTextProvider.ProcessResourceText(IResource resource, IResourceTextConsumer consumer)
 {
     consumer.AddDocumentFragment(resource.Id, GetResourceText(resource));
     return(true);
 }