public static string GetText(string filename, string extension) { try { IDocumentHandler docHandler = null; if (extension.ToLower().StartsWith("htm") || extension.ToLower().StartsWith(".htm")) { docHandler = new NTidyHTMLHandler(); } else if (extension.ToLower().StartsWith("txt") || extension.ToLower().StartsWith(".txt")) { docHandler = new TextHandler(); } else if (extension.ToLower().StartsWith("pdf") || extension.ToLower().StartsWith(".pdf")) { docHandler = new PdfHandler(); } if (docHandler == null) { return(""); } else { return(docHandler.GetText(filename)); } } catch (Exception ex) { GXLogging.Error(log, "GetText error", ex); return(""); } }
public static string HtmlPreview(Object obj, string query, string textType, string preTag, string postTag, int fragmentSize, int maxNumFragments) { string text; GxSilentTrnSdt silent = obj as GxSilentTrnSdt; GxFile file = obj as GxFile; if (silent != null) { text = (silent).Transaction.ToString(); } else if (file != null) { text = DocumentHandler.GetText(file.GetAbsoluteName(), System.IO.Path.GetExtension(file.GetAbsoluteName())); } else if (textType.ToLower().StartsWith("htm")) { text = new NTidyHTMLHandler().GetTextFromString(obj.ToString()); } else { text = obj.ToString(); } if (!string.IsNullOrEmpty(query) && !string.IsNullOrEmpty(text)) { if (qp == null) { qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_24, IndexRecord.CONTENTFIELD, Indexer.CreateAnalyzer()); qp.DefaultOperator = QueryParser.Operator.AND; qp.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; } Query unReWrittenQuery = qp.Parse(query); Query q = unReWrittenQuery; try { if (reader == null) { reader = Indexer.Reader; } if (!queries.TryGetValue(query, out q)) { q = unReWrittenQuery.Rewrite(reader);//required to expand search terms (for the usage of highlighting with wildcards) if (queries.Count == int.MaxValue) { queries.Clear(); } queries[query] = q; } } catch (Exception ex) { GXLogging.Error(log, "HTMLPreview error", ex); } QueryScorer scorer = new QueryScorer(q); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(preTag, postTag); Highlighter highlighter = new Highlighter(formatter, scorer); IFragmenter fragmenter = new SimpleFragmenter(fragmentSize); highlighter.TextFragmenter = fragmenter; TokenStream tokenStream = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_24).TokenStream("Content", new StringReader(text)); String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragments, "..."); return(result); } else { return(text); } }