Esempio n. 1
0
 public static string GetText(string filename, string extension)
 {
     try
     {
         IDocumentHandler docHandler = null;
         if (extension.ToLower().StartsWith("htm") || extension.ToLower().StartsWith(".htm"))
         {
             docHandler = new NTidyHTMLHandler();
         }
         else if (extension.ToLower().StartsWith("txt") || extension.ToLower().StartsWith(".txt"))
         {
             docHandler = new TextHandler();
         }
         else if (extension.ToLower().StartsWith("pdf") || extension.ToLower().StartsWith(".pdf"))
         {
             docHandler = new PdfHandler();
         }
         if (docHandler == null)
         {
             return("");
         }
         else
         {
             return(docHandler.GetText(filename));
         }
     }
     catch (Exception ex)
     {
         GXLogging.Error(log, "GetText error", ex);
         return("");
     }
 }
Esempio n. 2
0
        public static string HtmlPreview(Object obj, string query, string textType, string preTag, string postTag, int fragmentSize, int maxNumFragments)
        {
            string         text;
            GxSilentTrnSdt silent = obj as GxSilentTrnSdt;
            GxFile         file   = obj as GxFile;

            if (silent != null)
            {
                text = (silent).Transaction.ToString();
            }
            else if (file != null)
            {
                text = DocumentHandler.GetText(file.GetAbsoluteName(), System.IO.Path.GetExtension(file.GetAbsoluteName()));
            }
            else if (textType.ToLower().StartsWith("htm"))
            {
                text = new NTidyHTMLHandler().GetTextFromString(obj.ToString());
            }
            else
            {
                text = obj.ToString();
            }
            if (!string.IsNullOrEmpty(query) && !string.IsNullOrEmpty(text))
            {
                if (qp == null)
                {
                    qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_24, IndexRecord.CONTENTFIELD, Indexer.CreateAnalyzer());
                    qp.DefaultOperator        = QueryParser.Operator.AND;
                    qp.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                }
                Query unReWrittenQuery = qp.Parse(query);
                Query q = unReWrittenQuery;
                try
                {
                    if (reader == null)
                    {
                        reader = Indexer.Reader;
                    }
                    if (!queries.TryGetValue(query, out q))
                    {
                        q = unReWrittenQuery.Rewrite(reader);//required to expand search terms (for the usage of highlighting with wildcards)

                        if (queries.Count == int.MaxValue)
                        {
                            queries.Clear();
                        }
                        queries[query] = q;
                    }
                }
                catch (Exception ex)
                {
                    GXLogging.Error(log, "HTMLPreview error", ex);
                }
                QueryScorer scorer = new QueryScorer(q);

                SimpleHTMLFormatter formatter   = new SimpleHTMLFormatter(preTag, postTag);
                Highlighter         highlighter = new Highlighter(formatter, scorer);
                IFragmenter         fragmenter  = new SimpleFragmenter(fragmentSize);

                highlighter.TextFragmenter = fragmenter;
                TokenStream tokenStream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_24).TokenStream("Content", new StringReader(text));

                String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragments, "...");
                return(result);
            }
            else
            {
                return(text);
            }
        }