StandardAnalyzer.TokenStream C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

            public override TokenStream TokenStream(string fieldName, TextReader reader)
            {
                // Split the title based on IdSeparators, then run it through the innerAnalyzer
                string title = reader.ReadToEnd();
                string partiallyTokenized = String.Join(" ", title.Split(PackageIndexEntity.IdSeparators, StringSplitOptions.RemoveEmptyEntries));

                return(innerAnalyzer.TokenStream(fieldName, new StringReader(partiallyTokenized)));
            }

コード例 #2

0

ファイルを表示

ファイル: MyAnalyzer.cs プロジェクト: Crest10086/DeckBuilder2

        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            StandardAnalyzer sa     = new StandardAnalyzer(MyLucene.GetLuceneVersion(), StopFilter.MakeStopSet(stopWords));
            TokenStream      result = sa.TokenStream(fieldName, reader);

            result = new MyFilter(result);
            return(result);
        }

コード例 #3

0

ファイルを表示

        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            StandardAnalyzer sa     = new StandardAnalyzer(stopWords);
            TokenStream      result = sa.TokenStream(fieldName, reader);

            result = new MyFilter(result);
            return(result);
        }

コード例 #4

0

ファイルを表示

        private void button1_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new StandardAnalyzer(Version.LUCENE_20);
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，欢迎你们所有人"));

            while (tokenStream.IncrementToken())
            {
                var str = tokenStream.GetAttribute <ITermAttribute>();
                Console.WriteLine(str.Term);
            }
        }

コード例 #5

0

ファイルを表示

        /// <summary>
        /// 一元分词
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button1_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new StandardAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，Hi欢迎你们大家"));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
        }

コード例 #6

0

ファイルを表示

ファイル: SearchManager.cs プロジェクト: vishalishere/arachnode.net

        public static string Summarize(Query query, bool shouldDocumentsBeClustered, string discoveryPath, Encoding encoding, Cache cache)
        {
            StandardAnalyzer standardAnalyzer = new StandardAnalyzer();

            Highlighter highligher = new Highlighter(new QueryScorer(query));

            highligher.SetTextFragmenter(new SimpleFragmenter(150));

            string text = UserDefinedFunctions.ExtractText(File.ReadAllText(discoveryPath, encoding)).Value;

            TokenStream tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text));

            return((highligher.GetBestFragments(tokenStream, text, 1, "...") + " ...").TrimStart(" ,".ToCharArray()));
        }

コード例 #7

0

ファイルを表示

        private void button1_Click(object sender, EventArgs e)
        {
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            using (TokenStream tokenStream = analyzer.TokenStream("", new StringReader("北京，Hi欢迎你们大家")))
            {
                ITermAttribute ita;
                while (tokenStream.IncrementToken())
                {
                    ita = tokenStream.GetAttribute <ITermAttribute>();
                    Console.WriteLine(ita.Term);
                }
            }
        }

コード例 #8

0

ファイルを表示

        public static void testStandard(String testString)
        {
            Analyzer     analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //不提倡使用/LUCENE_CURRENT ，用精确的版本号，这里是3.0
            StringReader r        = new StringReader(testString);
            TokenStream  ts       = analyzer.TokenStream("", r);

            Console.WriteLine("=====Standard analyzer=======");
            ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>();

            while (ts.IncrementToken())
            {
                string iterm = termAtt.Term;
                Console.WriteLine("[" + iterm + "]");
            }
        }

コード例 #9

0

ファイルを表示

        protected void btnGetSegmentation_Click(object sender, EventArgs e)
        {
            string words = txtWords.Text;

            if (string.IsNullOrEmpty(words))
            {
                return;
            }

            Analyzer    analyzer    = new StandardAnalyzer(); // 标准分词 → 一元分词
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(words));
            Token       token       = null;

            while ((token = tokenStream.Next()) != null) // 只要还有词，就不返回null
            {
                string word = token.TermText();          // token.TermText() 取得当前分词
                Response.Write(word + "   |  ");
            }
        }

コード例 #10

0

ファイルを表示

ファイル: AnalyzerText.cs プロジェクト: creastudio-inc/AskCaro

        public static string GetTag(string text)
        {
            text = text.Replace("\n", string.Empty).Replace("\r", string.Empty).Replace(",", string.Empty).Replace("    ", string.Empty);
            StandardAnalyzer analyzer = new StandardAnalyzer();

            int termCounter = 0;

            StringBuilder sb = new StringBuilder();

            AnalyzerView view = new TermAnalyzerView();

            StringReader stringReader = new StringReader(text);

            TokenStream tokenStream = analyzer.TokenStream("defaultFieldName", stringReader);

            var Text = view.GetView(tokenStream, out termCounter).Trim();

            return(Text);
        }

コード例 #11

0

ファイルを表示

ファイル: LuceneHelper.cs プロジェクト: lurenjiacode/MyAtuofacMVC

        public static string GetKeyWordSplid(string keywords)
        {
            StringBuilder sb = new StringBuilder();
            //Analyzer analyzer = new PanGuAnalyzer();
            Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            TokenStream    stream  = analyzer.TokenStream(keywords, new StringReader(keywords));
            ITermAttribute ita     = null;
            bool           hasNext = stream.IncrementToken();

            while (hasNext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                sb.Append(ita.Term + " ");
                hasNext = stream.IncrementToken();
            }
            return(sb.ToString());
        }

コード例 #12

0

ファイルを表示

        public ActionResult Cut(string str)
        {
            //一元分词-简单分词
            StringBuilder sb = new StringBuilder();

            StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));

            ITermAttribute item = tokenStream.GetAttribute <ITermAttribute>();

            while (tokenStream.IncrementToken())

            {
                sb.Append(item.Term + "|");
            }
            tokenStream.CloneAttributes();
            analyzer.Close();
            return(Content(sb.ToString()));
        }

コード例 #13

0

ファイルを表示

ファイル: Search.cs プロジェクト: JuergenGutschOnTwitch/sharpcms-original

        /// <summary>
        /// Does the search and stores the information about the results.
        /// </summary>
        public void HandleSearch(string searchQuery)
        {
            DateTime start = DateTime.Now;

            _searchQuery = searchQuery;

            // create the searcher
            // index is placed in "index" subdirectory
            var      searcher = new IndexSearcher(_indexDir);
            Analyzer analyzer = new StandardAnalyzer();

            // parse the query, "text" is the default field to search
            var query = QueryParser.Parse(_searchQuery, "text", analyzer);

            const string containerName      = "content";
            Container    container          = _currentPage.Containers[containerName];
            const string resultElementName  = "result";
            const string pagingElementName  = "paging";
            const string summaryElementName = "summary";
            const string allElementNames    = resultElementName + pagingElementName + summaryElementName;
            int          count = container.Elements.Count;

            // Remove previous search result
            for (int i = count; i > 0; --i)
            {
                if (container.Elements[i] == null)
                {
                    continue;
                }

                if (allElementNames.IndexOf(container.Elements[i].Type, StringComparison.Ordinal) > -1)
                {
                    container.Elements.Remove(i);
                }
            }

            Element queryElement = container.Elements[0];
            Element element      = container.Elements.Create(summaryElementName);

            queryElement["query"] = _searchQuery;

            // search
            Hits hits = searcher.Search(query);

            _total = hits.Length();

            // create highlighter
            var highlighter = new Highlighter(new QueryScorer(query));

            // initialize startAt
            _startFirstAt = InitStartAt();

            // how many items we should show - less than defined at the end of the results
            int resultsCount = SmallerOf(_total, MaxResults + _startFirstAt);

            for (int i = _startFirstAt; i < resultsCount; i++)
            {
                // get the document from index
                Document document = hits.Doc(i);
                string   path     = document.Get("url");

                if (path != null)
                {
                    string plainText = document.Get("text");

                    TokenStream tokenStream = analyzer.TokenStream("text", new StringReader(plainText));
                    string      text        = highlighter.GetBestFragments(tokenStream, plainText, 2, "...");

                    element           = container.Elements.Create(resultElementName);
                    element["title"]  = document.Get("title");
                    element["path"]   = _searchPage + path.Replace("\\", "/") + "/";
                    element["sample"] = string.IsNullOrEmpty(text) ? plainText : text;
                }
            }

            searcher.Close();

            _duration = DateTime.Now - start;
            _fromItem = _startFirstAt + 1;
            _toItem   = SmallerOf(_startFirstAt + MaxResults, _total);

            // result information
            element.Node.InnerText = Summary;

            // paging link
            element = container.Elements.Create(pagingElementName);
            element.Node.InnerText = SetPaging();

            _process.SearchContext = _currentPage;

            _currentPage.Save();
        }

コード例 #14

0

ファイルを表示

        public static DataTable searchClanci(string pretraga)
        {
            DataTable ResultsClanci = new DataTable();
            // create the searcher
            // index is placed in "index" subdirectory
            string        indexDirectory = AppDomain.CurrentDomain.BaseDirectory + "Clanci";
            var           analyzer       = new StandardAnalyzer(Version.LUCENE_30);
            IndexSearcher searcher       = new IndexSearcher(FSDirectory.Open(indexDirectory));

            // parse the query, "text" is the default field to search
            var parser = new MultiFieldQueryParser(Version.LUCENE_30, new[] { "Naslov", "Sazetak", "Sadrzaj", "Tagovi" }, analyzer);
            //var parser = new QueryParser(Version.LUCENE_30, "Sazetak" , analyzer);
            Query query = parser.Parse(pretraga);


            //// create the result DataTable
            ResultsClanci.Columns.Add("id", typeof(Int32));
            ResultsClanci.Columns.Add("Naslov", typeof(string));
            ResultsClanci.Columns.Add("Sadrzaj", typeof(string));
            ResultsClanci.Columns.Add("Tagovi", typeof(string));
            ResultsClanci.Columns.Add("DatumKreiranja", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZadnjeIzmjene", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZadnjeAktivnosti", typeof(DateTime));
            ResultsClanci.Columns.Add("DatumZatvaranjaPosta", typeof(DateTime));
            ResultsClanci.Columns.Add("PrihvaceniOdgovori", typeof(Int32));
            ResultsClanci.Columns.Add("BrojOdgovora", typeof(Int32));
            ResultsClanci.Columns.Add("BrojKomentara", typeof(Int32));
            ResultsClanci.Columns.Add("BrojOmiljenih", typeof(Int32));
            ResultsClanci.Columns.Add("BrojPregleda", typeof(Int32));
            ResultsClanci.Columns.Add("BrojPoena", typeof(Int32));
            ResultsClanci.Columns.Add("VlasnikID", typeof(Int32));
            ResultsClanci.Columns.Add("VlasnikNadimak", typeof(string));
            ResultsClanci.Columns.Add("PromijenioID", typeof(Int32));
            ResultsClanci.Columns.Add("RoditeljskiPostID", typeof(Int32));
            //Results.Columns.Add("PodKategorija", typeof(Int32));
            ResultsClanci.Columns.Add("PostVrsta", typeof(Int32));
            ResultsClanci.Columns.Add("SlikaURL", typeof(string));
            ResultsClanci.Columns.Add("temp", typeof(string));
            ResultsClanci.Columns.Add("Likes", typeof(Int32));
            ResultsClanci.Columns.Add("Unlikes", typeof(Int32));
            ResultsClanci.Columns.Add("Sazetak", typeof(string));
            ResultsClanci.Columns.Add("BrojRangiranja", typeof(Int32));
            ResultsClanci.Columns.Add("PrihvacenaIzmjena", typeof(Int32));
            ResultsClanci.Columns.Add("Podnaslov", typeof(string));
            ResultsClanci.Columns.Add("Broj.Razgovora", typeof(Int32));
            ResultsClanci.Columns.Add("sample", typeof(string));
            ResultsClanci.Columns.Add("sampleNaslov", typeof(string));

            // search
            TopDocs hits = searcher.Search(query, 6);

            //E this.total = hits.TotalHits;

            // create highlighter
            IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold; background-color: #e5ecf9; \">", "</span>"); // ovdje radi hl svoje
            SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
            QueryScorer      scorer      = new QueryScorer(query);
            Highlighter      highlighter = new Highlighter(formatter, scorer);

            highlighter.TextFragmenter = fragmenter;


            for (int i = 0; i < hits.ScoreDocs.Count(); i++)
            {
                // get the document from index
                Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);

                TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("Sazetak")));
                String      sample = highlighter.GetBestFragments(stream, doc.Get("Sazetak"), 3, "..."); // uzimamo najbolje fragmente texta



                //String path = doc.Get("path");

                // create a new row with the result data
                DataRow row = ResultsClanci.NewRow();

                row["id"]                    = doc.Get("id");
                row["Naslov"]                = doc.Get("Naslov"); //doc.Get("Naslov");
                row["Sadrzaj"]               = doc.Get("Sadrzaj");
                row["Tagovi"]                = doc.Get("Tagovi");
                row["DatumKreiranja"]        = doc.Get("DatumKreiranja");
                row["DatumZadnjeIzmjene"]    = doc.Get("DatumZadnjeIzmjene");
                row["DatumZadnjeAktivnosti"] = doc.Get("DatumZadnjeAktivnosti");
                //row["DatumZatvaranjaPosta"] = doc.Get("DatumZatvaranjaPosta");
                row["PrihvaceniOdgovori"] = doc.Get("PrihvaceniOdgovori");
                row["BrojOdgovora"]       = doc.Get("BrojOdgovora");
                row["BrojKomentara"]      = doc.Get("BrojKomentara");
                row["BrojOmiljenih"]      = doc.Get("BrojOmiljenih");
                row["BrojPregleda"]       = doc.Get("BrojPregleda");
                row["BrojPoena"]          = doc.Get("BrojPoena");
                //row["VlasnikID"] = doc.Get("VlasnikID");
                row["VlasnikNadimak"] = doc.Get("VlasnikNadimak");
                //row["PromijenioID"] = doc.Get("PromijenioID");
                //row["RoditeljskiPostID"] = doc.Get("RoditeljskiPostID");
                //row["PodKategorija"] = doc.Get("PodKategorija");
                row["PostVrsta"] = doc.Get("PostVrsta");
                row["SlikaURL"]  = doc.Get("SlikaURL");
                //row["temp"] = doc.Get("temp");
                row["Likes"]             = doc.Get("Likes");
                row["Unlikes"]           = doc.Get("Unlikes");
                row["Sazetak"]           = sample; //doc.Get("Sazetak");
                row["BrojRangiranja"]    = doc.Get("BrojRangiranja");
                row["PrihvacenaIzmjena"] = doc.Get("PrihvacenaIzmjena");
                row["Podnaslov"]         = doc.Get("Podnaslov");
                //row["Broj.Razgovora"] = doc.Get("Broj.Razgovora");
                //row["sample"] = sample;
                //row["sampleNaslov"] = sampleNaslov;


                ResultsClanci.Rows.Add(row);
            }
            searcher.Dispose();

            return(ResultsClanci); // vracamo datatable i dodajemo u datasource
        }

コード例 #15

0

ファイルを表示

ファイル: Search.cs プロジェクト: maximz/legato

        /// <summary>
        /// Searches the lucene index with the search text.
        /// </summary>
        /// <param name="searchText">The text to search with.</param>
        /// <remarks>Syntax reference: http://lucene.apache.org/java/2_3_2/queryparsersyntax.html#Wildcard</remarks>
        /// <exception cref="SearchException">An error occured searching the lucene.net index.</exception>
        public SearchResultsModel SearchIndex(string searchText)
        {
            // This check is for the benefit of the CI builds
            if (!Directory.Exists(_indexPath))
            {
                CreateIndex();
            }

            var model = new SearchResultsModel();

            StandardAnalyzer analyzer = new StandardAnalyzer();

            try
            {
                IndexSearcher searcher = new IndexSearcher(_indexPath);
                // Build query
                var parser      = new MultiFieldQueryParser(new string[] { "Text", "Title" }, analyzer);
                var searchQuery = parser.Parse(searchText);

                // Execute search
                var hits = searcher.Search(searchQuery);

                // Display results
                var results = new List <Result>();
                for (int i = 0; i < hits.Length(); i++)
                {
                    results.Add(new Result()
                    {
                        doc   = hits.Doc(i),
                        Score = hits.Score(i)
                    });
                }

                //Highlight the parts that are matched:
                var formatter   = new SimpleHTMLFormatter("<span style='background:yellow;font-weight:bold;'>", "</span>");
                var fragmenter  = new SimpleFragmenter(400);
                var scorer      = new QueryScorer(searchQuery);
                var highlighter = new Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);
                var finalResults = new List <DisplayedResult>();

                var db = Legato.Current.DB;
                foreach (var result in results)
                {
                    var stream      = analyzer.TokenStream("", new StringReader(result.doc.Get("RawText")));
                    var highlighted = highlighter.GetBestFragments(stream, result.doc.Get("RawText"), 1, "...").Replace("'", "''");
                    if (highlighted == "")                     // sometimes the highlighter fails to emit text...
                    {
                        highlighted = result.doc.Get("RawText").Replace("'", "''");
                    }
                    if (highlighted.Length > 1000)
                    {
                        highlighted = highlighted.Substring(0, 1000);
                    }

                    int postID;
                    if (!int.TryParse(result.doc.Get("GlobalPostID"), out postID))                     // If GlobalPostID is null or not a number, this isn't a valid search entry, so we skip it.
                    {
                        continue;
                    }
                    var post = db.GlobalPostIDs.Where(p => p.GlobalPostID1 == postID).SingleOrDefault();
                    if (post == null)
                    {
                        continue;
                    }
                    // TODO: privacy checks?
                    post.FillProperties();

                    finalResults.Add(new DisplayedResult()
                    {
                        ResultPost      = post,
                        Score           = result.Score,
                        HighlightedHTML = highlighted
                    });
                }

                model = new SearchResultsModel()
                {
                    Results = finalResults.OrderByDescending(r => r.Score),
                    Query   = searchText.Trim()
                };
            }
            catch (Exception ex)
            {
                throw new SearchException(ex, "An error occured while searching the index");
            }

            return(model);

            /*MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "content", "title" }, analyzer);
             *
             * Query query = null;
             * try
             * {
             *      query = parser.Parse(searchText);
             * }
             * catch (Lucene.Net.QueryParsers.ParseException)
             * {
             *      // Catch syntax errors in the search and remove them.
             *      searchText = QueryParser.Escape(searchText);
             *      query = parser.Parse(searchText);
             * }
             *
             * if (query != null)
             * {
             *      try
             *      {
             *              IndexSearcher searcher = new IndexSearcher(_indexPath);
             *              Hits hits = searcher.Search(query);
             *
             *              for (int i = 0; i < hits.Length(); i++)
             *              {
             *                      Document document = hits.Doc(i);
             *
             *                      DateTime createdOn = DateTime.Now;
             *                      if (!DateTime.TryParse(document.GetField("createdon").StringValue(), out createdOn))
             *                              createdOn = DateTime.Now;
             *
             *                      SearchResult result = new SearchResult()
             *                      {
             *                              Id = int.Parse(document.GetField("id").StringValue()),
             *                              Title = document.GetField("title").StringValue(),
             *                              ContentSummary = document.GetField("contentsummary").StringValue(),
             *                              Tags = document.GetField("tags").StringValue(),
             *                              CreatedBy = document.GetField("createdby").StringValue(),
             *                              CreatedOn = createdOn,
             *                              ContentLength = int.Parse(document.GetField("contentlength").StringValue()),
             *                              Score = hits.Score(i)
             *                      };
             *
             *                      list.Add(result);
             *              }
             *      }
             *      catch (Exception ex)
             *      {
             *              throw new SearchException(ex, "An error occured while searching the index");
             *      }
             * }
             *
             * return list; */
        }

コード例 #16

0

ファイルを表示

ファイル: LuceneSearcher.cs プロジェクト: maddadder/lazymeadows

    public void Search(Query query)
    {
        if (IndexSearcher == null)
        {
            throw new Exception("IndexSearcher not created");
        }

        trace("search {0}", query.ToString());

        var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

        // do the query
        var     start        = DateTime.Now.TimeOfDay;
        TopDocs SearchResult = IndexSearcher.Search(query, _MAXRESULTS);

        _totalItems = SearchResult.TotalHits;
        if (_totalItems > _MAXRESULTS)
        {
            _totalItems = _MAXRESULTS;
        }

        // create highlighter
        IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
        SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
        QueryScorer      scorer      = new QueryScorer(query);
        Highlighter      highlighter = new Highlighter(formatter, scorer);

        highlighter.TextFragmenter = fragmenter;

        // initialize startAt
        _startAt = InitStartAt();

        // how many items we should show - less than defined at the end of the results
        int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt);

        if (resultsCount > _MAXRESULTS)
        {
            resultsCount = _MAXRESULTS;
        }

        for (int i = _startAt; i < resultsCount; i++)
        {
            // get the document from index
            Document doc = IndexSearcher.Doc(SearchResult.ScoreDocs[i].Doc);

            TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
            String      sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");

            String path = doc.Get("path");

            // create a new row with the result data
            DataRow row = _Results.NewRow();
            row["title"]  = doc.Get("title");
            row["path"]   = path;
            row["url"]    = _baseURL + path;
            row["sample"] = sample;
            row["score"]  = Convert.ToInt16(SearchResult.ScoreDocs[i].Score * 100);
            row["id"]     = doc.Get("id");
            row["type"]   = doc.Get("type");

            _Results.Rows.Add(row);
        }
        IndexSearcher.Dispose();

        var end = DateTime.Now.TimeOfDay;

        trace("Search completed in {0}ms", end.TotalMilliseconds - start.TotalMilliseconds);
        trace(SearchResult, IndexSearcher);
    }

コード例 #17

0

ファイルを表示

ファイル: LuceneSearcher.cs プロジェクト: maddadder/lazymeadows

    public void Search(string Query)
    {
        if (IndexSearcher == null)
        {
            throw new Exception("IndexSearcher not created");
        }

        _query = Query;
        DateTime start = DateTime.Now;

        var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

        var fieldName         = "text";
        var minimumSimilarity = 0.5f;
        var prefixLength      = 3;
        var query             = new BooleanQuery();

        var segments = _query.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

        foreach (string segment in segments)
        {
            Term       term       = new Term(fieldName, segment);
            FuzzyQuery fuzzyQuery = new FuzzyQuery(term, minimumSimilarity, prefixLength);
            query.Add(fuzzyQuery, Occur.SHOULD);
        }

        // search
        TopDocs hits = IndexSearcher.Search(query, 200);

        _totalItems = hits.TotalHits;
        if (_totalItems > _MAXRESULTS)
        {
            _totalItems = _MAXRESULTS;
        }

        // create highlighter
        IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"font-weight:bold;\">", "</span>");
        SimpleFragmenter fragmenter  = new SimpleFragmenter(80);
        QueryScorer      scorer      = new QueryScorer(query);
        Highlighter      highlighter = new Highlighter(formatter, scorer);

        highlighter.TextFragmenter = fragmenter;

        // initialize startAt
        _startAt = InitStartAt();

        // how many items we should show - less than defined at the end of the results
        int resultsCount = Math.Min(_totalItems, _resultsPerPage + _startAt);

        if (resultsCount > _MAXRESULTS)
        {
            resultsCount = _MAXRESULTS;
        }

        for (int i = _startAt; i < resultsCount; i++)
        {
            // get the document from index
            Document doc = IndexSearcher.Doc(hits.ScoreDocs[i].Doc);

            TokenStream stream = analyzer.TokenStream("", new StringReader(doc.Get("text")));
            String      sample = highlighter.GetBestFragments(stream, doc.Get("text"), 2, "...");

            String path = doc.Get("path");

            // create a new row with the result data
            DataRow row = _Results.NewRow();
            row["title"]  = doc.Get("title");
            row["path"]   = path;
            row["url"]    = _baseURL + path;
            row["sample"] = sample;
            row["score"]  = Convert.ToInt16(hits.ScoreDocs[i].Score * 100);

            _Results.Rows.Add(row);
        }
        IndexSearcher.Dispose();

        // result information
        _duration = DateTime.Now - start;
        _fromItem = _startAt + 1;
        _toItem   = Math.Min(_startAt + this.ResultsPerPage, _totalItems);
    }

コード例 #18

0

ファイルを表示

ファイル: HomeController.cs プロジェクト: shiva78/Search-function-using-Lucene.net

        public ActionResult Search()
        {
            var    path          = Server.MapPath("/Index-lucene");
            int    numberOfFiles = System.IO.Directory.GetFiles(path).Length;
            var    searchText    = Request.QueryString.ToString();
            string output        = searchText.Substring(searchText.IndexOf('=') + 1);
            string searchWord    = output.Replace('+', ' ');

            ViewBag.YourSearch = searchWord;
            if (numberOfFiles != 0 && output.Length > 0)
            {
                Lucene.Net.Store.Directory dir = FSDirectory.Open(path);
                Analyzer    analyzer           = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                IndexReader indexReader        = IndexReader.Open(dir, true);
                Searcher    indexSearch        = new IndexSearcher(indexReader);

                try
                {
                    var    startSearchTime        = DateTime.Now.TimeOfDay;
                    string totaltimeTakenToSearch = string.Empty;
                    var    queryParser            = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "metaTag", "prevewContent", "fileNameWithoutExtension" }, analyzer);
                    var    query = queryParser.Parse(searchWord);
                    //ViewBag.SearchQuery = "Searching for: \"" + searchWord + "\"";
                    TopDocs resultDocs = indexSearch.Search(query, indexReader.NumDocs());
                    ViewBag.SearchQuery = resultDocs.TotalHits + " result(s) found for \"" + searchWord + "\"";
                    TopScoreDocCollector collector = TopScoreDocCollector.Create(20000, true);
                    indexSearch.Search(query, collector);
                    ScoreDoc[]       hits        = collector.TopDocs().ScoreDocs;
                    IFormatter       formatter   = new SimpleHTMLFormatter("<span style=\"color: black; font-weight: bold;\">", "</span>");
                    SimpleFragmenter fragmenter  = new SimpleFragmenter(160);
                    QueryScorer      scorer      = new QueryScorer(query);
                    Highlighter      highlighter = new Highlighter(formatter, scorer);
                    highlighter.TextFragmenter = fragmenter; //highlighter.SetTextFragmenter(fragmenter);
                    List <ListofResult> parts = new List <ListofResult>();
                    for (int i = 0; i < hits.Length; i++)
                    {
                        int         docId   = hits[i].Doc;
                        float       score   = hits[i].Score;
                        Document    doc     = indexSearch.Doc(docId);
                        string      url     = doc.Get("URL");
                        string      title   = doc.Get("filename");
                        TokenStream stream  = analyzer.TokenStream("", new StringReader(doc.Get("prevewContent")));
                        string      content = highlighter.GetBestFragments(stream, doc.Get("prevewContent"), 3, "...");
                        if (content == null || content == "")
                        {
                            string contents = doc.Get("prevewContent");
                            if (contents != "")
                            {
                                if (contents.Length < 480)
                                {
                                    content = contents.Substring(0, contents.Length);
                                }
                                else
                                {
                                    content = contents.Substring(0, 480);
                                }
                            }
                        }
                        parts.Add(new ListofResult()
                        {
                            FileName = title, Content = content, URL = url
                        });
                        var endSearchTime = DateTime.Now.TimeOfDay;
                        var timeTaken     = endSearchTime.TotalMilliseconds - startSearchTime.TotalMilliseconds;
                        totaltimeTakenToSearch = timeTaken.ToString();
                    }
                    //Search completed, dispose IndexSearcher
                    indexSearch.Dispose();
                    //assigning list into ViewBag
                    ViewBag.SearchResult = parts;
                }
                catch (Exception ex)
                {
                }
            }
            else
            {
                return(RedirectToAction("UploadFile", "Home"));
            }
            return(View());
        }

コード例 #19

0

ファイルを表示

        public static string Summarize(Query query, Query wildcardSafeQuery, bool shouldDocumentsBeClustered, string text)
        {
            int fragmentLength = 150;

            StandardAnalyzer standardAnalyzer = new StandardAnalyzer();

            Highlighter highligher = new Highlighter(new QueryScorer(query));

            highligher.SetTextFragmenter(new SimpleFragmenter(fragmentLength));

            string text2 = UserDefinedFunctions.ExtractText(text).Value;

            TokenStream tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text2));

            string bestFragments = (highligher.GetBestFragments(tokenStream, text2, 1, "...") + " ...").TrimStart(" ,".ToCharArray());

            if (bestFragments == "...")
            {
                text = HttpUtility.HtmlEncode(text);

                tokenStream = standardAnalyzer.TokenStream("text", new StringReader(text));

                bestFragments = (highligher.GetBestFragments(tokenStream, text, 1, "...") + " ...").TrimStart(" ,".ToCharArray());

                if (bestFragments == "...")
                {
                    Hashtable hashTable = new Hashtable();

                    try
                    {
                        query.ExtractTerms(hashTable);
                    }
                    catch
                    {
                        try
                        {
                            wildcardSafeQuery.ExtractTerms(hashTable);
                        }
                        catch
                        {
                        }
                    }

                    if (hashTable.Count != 0)
                    {
                        string firstTerm = null;

                        foreach (Term term in hashTable.Values)
                        {
                            if (term.Field() == "text")
                            {
                                string termText = term.Text();

                                if (termText != null)
                                {
                                    firstTerm = termText.Split(' ')[0];

                                    break;
                                }
                            }
                        }

                        if (firstTerm != null)
                        {
                            int index = text.ToLowerInvariant().IndexOf(firstTerm);

                            if (index != -1)
                            {
                                if (index + fragmentLength > text.Length)
                                {
                                    fragmentLength = text.Length - index;
                                }

                                bestFragments = Regex.Replace(text.Substring(index, fragmentLength), firstTerm, "<b>" + firstTerm + "</b>", RegexOptions.IgnoreCase) + "...";
                            }
                        }
                    }
                }
            }

            return(bestFragments);
        }

コード例 #20

0

ファイルを表示

ファイル: LuceneHelper.cs プロジェクト: VansonLeung/geminis_web_cms

 private static string getHighlight(Highlighter highlighter, StandardAnalyzer analyzer, string fieldContent)
 {
     Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(fieldContent));
     return(highlighter.GetBestFragments(stream, fieldContent, 1, "."));
 }

コード例 #21

0

ファイルを表示

ファイル: LookSearchService.cs プロジェクト: greystate/umbraco-look

        /// <summary>
        ///  Main searching method
        /// </summary>
        /// <param name="lookQuery"></param>
        /// <returns>an IEnumerableWithTotal</returns>
        public static IEnumerableWithTotal <LookMatch> Query(LookQuery lookQuery)
        {
            IEnumerableWithTotal <LookMatch> lookMatches = null; // prepare return value

            if (lookQuery == null)
            {
                LogHelper.Warn(typeof(LookService), "Supplied search query was null");
            }
            else
            {
                var searchProvider = LookService.Searcher;

                var searchCriteria = searchProvider.CreateSearchCriteria();

                var query = searchCriteria.Field(string.Empty, string.Empty);

                // Text
                if (!string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText))
                {
                    if (lookQuery.TextQuery.Fuzzyness > 0)
                    {
                        query.And().Field(LookService.TextField, lookQuery.TextQuery.SearchText.Fuzzy(lookQuery.TextQuery.Fuzzyness));
                    }
                    else
                    {
                        query.And().Field(LookService.TextField, lookQuery.TextQuery.SearchText);
                    }
                }

                // Tags
                if (lookQuery.TagQuery != null)
                {
                    var allTags = new List <string>();
                    var anyTags = new List <string>();

                    if (lookQuery.TagQuery.AllTags != null)
                    {
                        allTags.AddRange(lookQuery.TagQuery.AllTags);
                        allTags.RemoveAll(x => string.IsNullOrWhiteSpace(x));
                    }

                    if (lookQuery.TagQuery.AnyTags != null)
                    {
                        anyTags.AddRange(lookQuery.TagQuery.AnyTags);
                        anyTags.RemoveAll(x => string.IsNullOrWhiteSpace(x));
                    }

                    if (allTags.Any())
                    {
                        query.And().GroupedAnd(allTags.Select(x => LookService.TagsField), allTags.ToArray());
                    }

                    if (anyTags.Any())
                    {
                        query.And().GroupedOr(allTags.Select(x => LookService.TagsField), anyTags.ToArray());
                    }
                }

                // TODO: Date

                // TODO: Name

                // Nodes
                if (lookQuery.NodeQuery != null)
                {
                    if (lookQuery.NodeQuery.TypeAliases != null)
                    {
                        var typeAliases = new List <string>();

                        typeAliases.AddRange(lookQuery.NodeQuery.TypeAliases);
                        typeAliases.RemoveAll(x => string.IsNullOrWhiteSpace(x));

                        if (typeAliases.Any())
                        {
                            query.And().GroupedOr(typeAliases.Select(x => UmbracoContentIndexer.NodeTypeAliasFieldName), typeAliases.ToArray());
                        }
                    }

                    if (lookQuery.NodeQuery.ExcludeIds != null)
                    {
                        foreach (var excudeId in lookQuery.NodeQuery.ExcludeIds.Distinct())
                        {
                            query.Not().Id(excudeId);
                        }
                    }
                }

                try
                {
                    searchCriteria = query.Compile();
                }
                catch (Exception exception)
                {
                    LogHelper.WarnWithException(typeof(LookService), "Could not compile the Examine query", exception);
                }

                if (searchCriteria != null && searchCriteria is LuceneSearchCriteria)
                {
                    Sort   sort   = null;
                    Filter filter = null;

                    Func <int, double?>        getDistance  = x => null;
                    Func <string, IHtmlString> getHighlight = null;

                    TopDocs topDocs = null;

                    switch (lookQuery.SortOn)
                    {
                    case SortOn.Date:     // newest -> oldest
                        sort = new Sort(new SortField(LuceneIndexer.SortedFieldNamePrefix + LookService.DateField, SortField.LONG, true));
                        break;

                    case SortOn.Name:     // a -> z
                        sort = new Sort(new SortField(LuceneIndexer.SortedFieldNamePrefix + LookService.NameField, SortField.STRING));
                        break;
                    }

                    if (lookQuery.LocationQuery != null && lookQuery.LocationQuery.Location != null)
                    {
                        double maxDistance = LookService.MaxDistance;

                        if (lookQuery.LocationQuery.MaxDistance != null)
                        {
                            maxDistance = Math.Min(lookQuery.LocationQuery.MaxDistance.GetMiles(), maxDistance);
                        }

                        var distanceQueryBuilder = new DistanceQueryBuilder(
                            lookQuery.LocationQuery.Location.Latitude,
                            lookQuery.LocationQuery.Location.Longitude,
                            maxDistance,
                            LookService.LocationField + "_Latitude",
                            LookService.LocationField + "_Longitude",
                            CartesianTierPlotter.DefaltFieldPrefix,
                            true);

                        // update filter
                        filter = distanceQueryBuilder.Filter;

                        if (lookQuery.SortOn == SortOn.Distance)
                        {
                            // update sort
                            sort = new Sort(
                                new SortField(
                                    LookService.DistanceField,
                                    new DistanceFieldComparatorSource(distanceQueryBuilder.DistanceFilter)));
                        }

                        // raw data for the getDistance func
                        var distances = distanceQueryBuilder.DistanceFilter.Distances;

                        // update getDistance func
                        getDistance = new Func <int, double?>(x =>
                        {
                            if (distances.ContainsKey(x))
                            {
                                return(distances[x]);
                            }

                            return(null);
                        });
                    }

                    var indexSearcher = new IndexSearcher(((LuceneIndexer)LookService.Indexer).GetLuceneDirectory(), false);

                    var luceneSearchCriteria = (LuceneSearchCriteria)searchCriteria;

                    // Do the Lucene search
                    topDocs = indexSearcher.Search(
                        luceneSearchCriteria.Query,                         // the query build by Examine
                        filter ?? new QueryWrapperFilter(luceneSearchCriteria.Query),
                        LookService.MaxLuceneResults,
                        sort ?? new Sort(SortField.FIELD_SCORE));

                    if (topDocs.TotalHits > 0)
                    {
                        // setup the highlighing func if required
                        if (lookQuery.TextQuery.HighlightFragments > 0 && !string.IsNullOrWhiteSpace(lookQuery.TextQuery.SearchText))
                        {
                            var version = Lucene.Net.Util.Version.LUCENE_29;

                            Analyzer analyzer = new StandardAnalyzer(version);

                            var queryParser = new QueryParser(version, LookService.TextField, analyzer);

                            var queryScorer = new QueryScorer(queryParser
                                                              .Parse(lookQuery.TextQuery.SearchText)
                                                              .Rewrite(indexSearcher.GetIndexReader()));

                            Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), queryScorer);

                            // update the func so it does real highlighting work
                            getHighlight = (x) =>
                            {
                                var tokenStream = analyzer.TokenStream(LookService.TextField, new StringReader(x));

                                var highlight = highlighter.GetBestFragments(
                                    tokenStream,
                                    x,
                                    lookQuery.TextQuery.HighlightFragments,                             // max number of fragments
                                    lookQuery.TextQuery.HighlightSeparator);                            // fragment separator

                                return(new HtmlString(highlight));
                            };
                        }

                        lookMatches = new EnumerableWithTotal <LookMatch>(
                            LookSearchService.GetLookMatches(
                                lookQuery,
                                indexSearcher,
                                topDocs,
                                getHighlight,
                                getDistance),
                            topDocs.TotalHits);
                    }
                }
            }

            return(lookMatches ?? new EnumerableWithTotal <LookMatch>(Enumerable.Empty <LookMatch>(), 0));
        }

C# (CSharp) StandardAnalyzer.TokenStreamの例