コード例 #1
0
ファイル: WebCommon.cs プロジェクト: axel10/insdep-oa
        public static List <string> PanGuSplitWord(string msg)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(msg));

            ITermAttribute ita;
            bool           hasNext = true;
            List <string>  list    = new List <string>();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute <ITermAttribute>();
                list.Add(ita.Term);
                hasNext = tokenStream.IncrementToken();
            }

            analyzer.Close();
            return(list);

            /*Token token;
            *  List<string> list = new List<string>();
            *  while ((token = tokenStream.) != null)
            *  {
            *   list.Add(token.TermText());
            *  }
            *  return list;*/
        }
コード例 #2
0
        // main search method
        private static IEnumerable <SearchData> _search(int pageIndex, int pageSize, out int totalCount, string searchQuery, string searchField = "")
        {
            // validation
            if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", "")))
            {
                totalCount = 0;
                return(new List <SearchData>());
            }

            // set up lucene searcher
            using (var searcher = new IndexSearcher(_directory, false))
            {
                var hits_limit = 1000;
                var analyzer   = new PanGuAnalyzer(); //new StandardAnalyzer(Version.LUCENE_30);

                // search by single field
                if (!string.IsNullOrEmpty(searchField))
                {
                    var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, searchField, analyzer);
                    var query  = parseQuery(searchQuery, parser);
                    var hits   = searcher.Search(query, hits_limit).ScoreDocs;
                    totalCount = hits.Length;

                    var results = _mapLuceneToDataList(pageIndex, pageSize, hits, searcher);
                    analyzer.Close();
                    searcher.Dispose();
                    return(results);
                }
                // search by multiple fields (ordered by RELEVANCE)
                else
                {
                    var parser = new MultiFieldQueryParser
                                     (Lucene.Net.Util.Version.LUCENE_30, new[] { "Id", "Name", "Url", "ImageUrl", "Description", "CreatedDate" }, analyzer);
                    var query = parseQuery(searchQuery, parser);
                    var hits  = searcher.Search(query, null, hits_limit, Sort.INDEXORDER).ScoreDocs;

                    totalCount = hits.Length;

                    var results = _mapLuceneToDataList(pageIndex, pageSize, hits, searcher);
                    analyzer.Close();
                    searcher.Dispose();
                    return(results);
                }
            }
        }
コード例 #3
0
        public static void Optimize()
        {
            var analyzer = new PanGuAnalyzer();  //new StandardAnalyzer(Version.LUCENE_30);

            using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                analyzer.Close();
                writer.Optimize();
                writer.Dispose();
            }
        }
コード例 #4
0
        public void CreateSearchIndex()
        {
            //路径判断
            if (!System.IO.Directory.Exists(SearchIndexPath))
            {
                System.IO.Directory.CreateDirectory(SearchIndexPath);
            }

            Directory   indexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(SearchIndexPath));
            var         analyzer       = new PanGuAnalyzer();
            IndexWriter writer         = null;

            try
            {
                //重新创建索引
                bool isCreate = true;// !IndexReader.IndexExists(indexDirectory);
                writer = new IndexWriter(indexDirectory, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);

                //所有的上架已审核的商品
                var goodses = _goodsQueryService.Goodses().Where(
                    g => g.IsPublished && g.Status == GoodsStatus.Verifyed);

                //开始添加索引
                foreach (var goods in goodses)
                {
                    AddIndex(writer, goods);
                }
                writer.Optimize();
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                if (analyzer != null)
                {
                    analyzer.Close();
                }
                if (writer != null)
                {
                    writer.Dispose();
                }
                if (indexDirectory != null)
                {
                    indexDirectory.Dispose();
                }
            }
        }
コード例 #5
0
        public static void ClearLuceneIndexRecord(string record_id)
        {
            // init lucene
            var analyzer = new PanGuAnalyzer();  //new StandardAnalyzer(Version.LUCENE_30);

            using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                // remove older index entry
                var searchQuery = new TermQuery(new Term("Id", record_id));
                writer.DeleteDocuments(searchQuery);

                // close handles
                analyzer.Close();
                writer.Dispose();
            }
        }
コード例 #6
0
        public static void AddUpdateLuceneIndex(IEnumerable <SearchData> sampleDatas)
        {
            // init lucene
            var analyzer = new PanGuAnalyzer();  //new StandardAnalyzer(Version.LUCENE_30);

            using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                // add data to lucene search index (replaces older entries if any)
                foreach (var sampleData in sampleDatas)
                {
                    _addToLuceneIndex(sampleData, writer);
                }

                // close handles
                analyzer.Close();
                writer.Dispose();
            }
        }
コード例 #7
0
        /// <summary>
        /// 生成LUCENE索引数据
        /// </summary>
        /// <param name="indexDocList">索引数据文档列表</param>
        /// <param name="directoryPath">索引文件路径</param>
        /// <param name="callback">回调方法</param>
        public static void MakeIndex(List <Document> indexDocList, string directoryPath, Action <Document> callback)
        {
            try
            {
                PanGuAnalyzer analyzer     = new PanGuAnalyzer(true);
                string        textIndexDir = directoryPath;
                if (!System.IO.Directory.Exists(textIndexDir))
                {
                    System.IO.Directory.CreateDirectory(textIndexDir);
                }

                Lucene.Net.Store.Directory indexDirectory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(textIndexDir), new NativeFSLockFactory());
                if (IndexReader.IndexExists(indexDirectory))
                {
                    if (IndexWriter.IsLocked(indexDirectory))
                    {
                        IndexWriter.Unlock(indexDirectory);
                    }
                }
                IndexWriter indexWriter = new IndexWriter(indexDirectory, analyzer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);
                if (indexDocList != null && indexDocList.Count > 0)
                {
                    foreach (var item in indexDocList)
                    {
                        indexWriter.AddDocument(item, analyzer);

                        if (callback != null)
                        {
                            callback(item);
                        }
                    }
                }

                indexWriter.Optimize();
                indexWriter.Close();

                analyzer.Close();
            }
            catch (Exception ex)
            {
                LogHelper.Info(typeof(LuceneManager), ex.ToString());
            }
        }
コード例 #8
0
        public ActionResult Cut_2(string str)
        {
            //盘古分词
            StringBuilder sb = new StringBuilder();

            Analyzer analyzer = new PanGuAnalyzer();

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));

            ITermAttribute item = tokenStream.GetAttribute <ITermAttribute>();

            while (tokenStream.IncrementToken())

            {
                sb.Append(item.Term + "|");
            }
            tokenStream.CloneAttributes();
            analyzer.Close();
            return(Content(sb.ToString()));
        }
コード例 #9
0
        public static bool ClearLuceneIndex()
        {
            try
            {
                var analyzer = new PanGuAnalyzer();  //new StandardAnalyzer(Version.LUCENE_30);
                using (var writer = new IndexWriter(_directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    // remove older index entries
                    writer.DeleteAll();

                    // close handles
                    analyzer.Close();
                    writer.Dispose();
                }
            }
            catch (Exception)
            {
                return(false);
            }
            return(true);
        }
コード例 #10
0
        //private static IndexSearcher searcher = new IndexSearcher(reader);

        #region 分词测试
        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public static string Token(string keyword)
        {
            string ret = "";

            System.IO.StringReader reader   = new System.IO.StringReader(keyword);
            PanGuAnalyzer          analyzer = new PanGuAnalyzer();

            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita     = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret    += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(ret);
        }
コード例 #11
0
        /// <summary>
        /// 对keyword进行分词,将分词的结果返回
        /// </summary>
        public static IEnumerable <string> SplitWords(string keyword)
        {
            IList <string> list = new List <string>();

            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Analyzer    analyzer          = new PanGuAnalyzer();
            TokenStream tokenStream       = analyzer.TokenStream("", reader);
            bool        hasNext           = tokenStream.IncrementToken();

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                //为当前分的词
                ita = tokenStream.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                string word = ita.Term;
                hasNext = tokenStream.IncrementToken();
                list.Add(word);
            }
            tokenStream.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return(list);
        }
コード例 #12
0
        /// <summary>
        /// 重新生成所有院校索引
        /// </summary>
        /// <param name="itemCallback"></param>
        public void MakeAllSchoolIndex(MakeSchoolItemIndexCallback itemCallback)
        {
            #region 海外大学
            try
            {
                OverseaUniversityDao     overseaUniversityDao = new OverseaUniversityDao();
                List <OverseaUniversity> universityList       = overseaUniversityDao.Backstage_GetAllUniversityList();

                PanGuAnalyzer analyzer = new PanGuAnalyzer(true);

                string textIndexDir = Utilities.GetLuceneTextIndexDirectoryPath(LuceneTextIndexType.AllSchool, null);
                if (!System.IO.Directory.Exists(textIndexDir))
                {
                    System.IO.Directory.CreateDirectory(textIndexDir);
                }

                Lucene.Net.Store.Directory indexDirectory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(textIndexDir), new NativeFSLockFactory());
                if (IndexReader.IndexExists(indexDirectory))
                {
                    if (IndexWriter.IsLocked(indexDirectory))
                    {
                        IndexWriter.Unlock(indexDirectory);
                    }
                }
                IndexWriter indexWriter = new IndexWriter(indexDirectory, analyzer, false, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);

                foreach (OverseaUniversity university in universityList)
                {
                    try
                    {
                        Document indexDoc = new Document();

                        #region 数据列
                        indexDoc.Add(new Field("Key", string.Concat("OverseaUniversity_", university.UniversityId), Field.Store.YES, Field.Index.ANALYZED));
                        indexDoc.Add(new NumericField("SchoolId", Field.Store.YES, true).SetIntValue(university.UniversityId));
                        indexDoc.Add(new NumericField("Type", Field.Store.YES, true).SetIntValue((int)SchoolType.OverseaUniversity));
                        indexDoc.Add(new Field("Name", university.Name, Field.Store.YES, Field.Index.ANALYZED));
                        indexDoc.Add(new Field("LC_Name", university.Name.ToLower(), Field.Store.NO, Field.Index.ANALYZED));
                        indexDoc.Add(new Field("CnName", university.CnName, Field.Store.YES, Field.Index.ANALYZED));
                        indexDoc.Add(new Field("LC_CnName", university.CnName.ToLower(), Field.Store.NO, Field.Index.ANALYZED));
                        indexDoc.Add(new Field("Pinyin", university.Pinyin, Field.Store.YES, Field.Index.ANALYZED));
                        indexDoc.Add(new Field("LC_Pinyin", university.Pinyin.ToLower(), Field.Store.NO, Field.Index.ANALYZED));
                        indexDoc.Add(new NumericField("RegionId", Field.Store.YES, true).SetIntValue(university.CountryId));
                        indexDoc.Add(new NumericField("StateId", Field.Store.YES, true).SetIntValue(university.StateId));
                        indexDoc.Add(new NumericField("CityId", Field.Store.YES, true).SetIntValue(university.CityId));
                        #endregion

                        indexWriter.AddDocument(indexDoc, analyzer);

                        if (itemCallback != null)
                        {
                            itemCallback(indexDoc);
                        }
                    }
                    catch (Exception ex)
                    {
                        LogHelper.WriteLog(ex);
                    }
                }

                indexWriter.Optimize();
                indexWriter.Close();

                analyzer.Close();
            }
            catch (Exception ex)
            {
                LogHelper.WriteLog(ex);
            }
            #endregion
        }
コード例 #13
0
        /// <summary>
        /// 提交内容
        /// </summary>
        /// <param name="id"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        /// <param name="date"></param>
        /// <param name="param"></param>
        /// <param name="modcode"></param>
        /// <returns></returns>
        public static string CommitContent(string id, string title, string content, DateTime?date, string param, string modcode, string searchparam1, string searchparam2, string searchparam3)
        {
            #region 验证输入参数是否合格
            if (string.IsNullOrEmpty(id))
            {
                throw new Exception("参数id不能为空");
            }
            if (string.IsNullOrEmpty(title))
            {
                throw new Exception("参数title不能为空");
            }
            if (string.IsNullOrEmpty(content))
            {
                throw new Exception("参数content和filepathList不能同时为空");
            }
            #endregion
            if (date == null)
            {
                date = DateTime.Now;              //日期为null时使用当前时间
            }
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            Analyzer    analyzer  = new PanGuAnalyzer();
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
            bool        isUpdate  = IndexReader.IndexExists(directory);
            if (isUpdate)
            {
                //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }
            IndexWriter writer = new IndexWriter(directory, analyzer, !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED);
            //为避免重复索引,所以先删除id=id的记录,再重新添加
            writer.DeleteDocuments(new Term("id", id));

            string result = "";
            try
            {
                Document doc = new Document();
                doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));                                                //存储
                doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));     //分词建立索引
                doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); //分词建立索引
                doc.Add(new Field("date", date.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));                                 //不分词建立索引
                doc.Add(new Field("param", param, Field.Store.YES, Field.Index.NO));                                                    //存储
                doc.Add(new Field("modcode", modcode, Field.Store.YES, Field.Index.NOT_ANALYZED));                                      //存储

                doc.Add(new Field("searchparam1", searchparam1, Field.Store.YES, Field.Index.NOT_ANALYZED));                            //存储
                doc.Add(new Field("searchparam2", searchparam2, Field.Store.YES, Field.Index.NOT_ANALYZED));                            //存储
                doc.Add(new Field("searchparam3", searchparam3, Field.Store.YES, Field.Index.NOT_ANALYZED));                            //存储

                writer.AddDocument(doc);

                //输出api调用记录
                string EnableApiLog = Util.GetAppSetting("EnableApiLog");
                if (EnableApiLog == "1")
                {
                    string logMsg = GetParamList(id, title, content, date, param, modcode) + "\r\n" + Utility.Util.GetClientInfo();
                    Utility.Util.WriteApiLog(logMsg);
                }
            }
            catch (Exception ex)
            {
                result = ex.Message;
                string errMsg = ex.Message + "\r\n" + GetParamList(id, title, content, date, param, modcode) + "\r\n" + Utility.Util.GetClientInfo();
                Utility.Util.WriteLog(errMsg);
            }

            //对索引文件进行优化
            //writer.Optimize();
            analyzer.Close();
            writer.Dispose();
            directory.Dispose();
            return(result);
        }
コード例 #14
0
        /// <summary>
        /// 搜索内容
        /// </summary>
        /// <param name="word">搜索关键字</param>
        /// <param name="pagesize">每页显示记录数</param>
        /// <param name="pageindex">当前页码</param>
        /// <returns></returns>
        public static SearchResult SearchContent(string modcode, string word, int pagesize, int pageindex, string searchparam1, string searchparam2, string searchparam3)
        {
            SearchResult searchResult = new SearchResult();

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
            IndexSearcher searcher  = new IndexSearcher(directory, true);
            var           analyzer  = new PanGuAnalyzer();

            //初始化MultiFieldQueryParser以便同时查询多列
            Lucene.Net.QueryParsers.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "title", "content" }, analyzer);
            Lucene.Net.Search.Query query = parser.Parse(word);//初始化Query
            parser.DefaultOperator = QueryParser.AND_OPERATOR;

            Lucene.Net.Search.BooleanQuery boolQuery = new Lucene.Net.Search.BooleanQuery();
            boolQuery.Add(query, Occur.MUST);
            if (!string.IsNullOrEmpty(modcode))
            {
                PhraseQuery queryModCode = new PhraseQuery();
                queryModCode.Add(new Term("modcode", modcode));
                boolQuery.Add(queryModCode, Occur.MUST);
            }

            if (!string.IsNullOrEmpty(searchparam1))
            {
                WildcardQuery query1 = new WildcardQuery(new Term("searchparam1", "*" + searchparam1 + "*"));
                boolQuery.Add(query1, Occur.MUST);
            }
            if (!string.IsNullOrEmpty(searchparam2))
            {
                WildcardQuery query1 = new WildcardQuery(new Term("searchparam2", "*" + searchparam2 + "*"));
                boolQuery.Add(query1, Occur.MUST);
            }
            if (!string.IsNullOrEmpty(searchparam3))
            {
                WildcardQuery query1 = new WildcardQuery(new Term("searchparam3", "*" + searchparam3 + "*"));
                boolQuery.Add(query1, Occur.MUST);
            }

            Sort sort   = new Sort(new SortField("date", SortField.STRING, true));
            var  result = searcher.Search(boolQuery, null, 1000, sort);

            if (result.TotalHits == 0)
            {
                searchResult.count = 0;
            }
            else
            {
                searchResult.count = result.TotalHits;
                int startNum = 0, endNum = result.TotalHits;
                if (pagesize > 0)
                {
                    //当pagesize>0时使用分页功能
                    startNum = (pageindex - 1) * pagesize;
                    endNum   = startNum + pagesize;
                }
                ScoreDoc[]     docs     = result.ScoreDocs;
                List <JObject> dataList = new List <JObject>();
                for (int i = 0; i < docs.Length; i++)
                {
                    if (i < startNum)
                    {
                        continue;
                    }
                    if (i >= endNum)
                    {
                        break;
                    }

                    Document doc     = searcher.Doc(docs[i].Doc);
                    string   id      = doc.Get("id").ToString();
                    string   title   = doc.Get("title").ToString();
                    string   content = doc.Get("content").ToString();
                    string   date    = doc.Get("date").ToString();
                    string   param   = doc.Get("param").ToString();
                    string   mcode   = doc.Get("modcode").ToString();
                    string   param1  = doc.Get("searchparam1").ToString();
                    string   param2  = doc.Get("searchparam2").ToString();
                    string   param3  = doc.Get("searchparam3").ToString();
                    JObject  obj     = new JObject();
                    obj["id"] = id;

                    //创建HTMLFormatter,参数为高亮单词的前后缀
                    string   highLightTag        = Util.GetAppSetting("HighLightTag", "<font color=\"red\">|</font>");
                    string[] tarArr              = highLightTag.Split('|');
                    var      simpleHTMLFormatter = new SimpleHTMLFormatter(tarArr[0], tarArr[1]);
                    //创建 Highlighter ,输入HTMLFormatter 和 盘古分词对象Semgent
                    var highlighter = new Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                    //设置每个摘要段的字符数
                    int highlightFragmentSize = Util.GetAppSetting("HighlightFragmentSize", "100").ToInt();
                    highlighter.FragmentSize = highlightFragmentSize;
                    //获取最匹配的摘要段
                    String bodyPreview = highlighter.GetBestFragment(word, content);
                    string newTitle    = highlighter.GetBestFragment(word, title);
                    if (!string.IsNullOrEmpty(newTitle))
                    {
                        title = newTitle;
                    }

                    obj["title"]        = title;
                    obj["content"]      = bodyPreview;
                    obj["date"]         = date;
                    obj["param"]        = param;
                    obj["modcode"]      = mcode;
                    obj["searchparam1"] = param1;
                    obj["searchparam2"] = param2;
                    obj["searchparam3"] = param3;
                    dataList.Add(obj);
                }
                searchResult.data = dataList;
            }
            analyzer.Close();
            searcher.Dispose();
            directory.Dispose();

            return(searchResult);
        }
コード例 #15
0
        /*
         * 分词函数
         * @srcdata:待分词的文本
         * 返回值:按照学长格式定义的分词结果的string表示
         * 即{<分词1>}{<分词2>}...{<分词n>}
         */

        //	这个函数是核心
        //	输入是待分词的内容
        //	输出是分词结果
        //	分词结果的格式是{<word>}
        //	这个格式是学长定义的,我们为了不破坏既定的接口,沿用了这个格式
        //	这个函数的工作原理主要是调用了Lucene.Net.Analysis和Lucene.China的接口
        //	调用这两个接口的配置工作很简单:1.在引用中加入dll文件 2.在可执行程序的目录下放置一个data文件夹,文件夹内有两个文件,分别是sDict和sNoise
        //	存放词库和噪声

        /*private bool isChineseWord(string word)
         * {
         *  if (word == null)
         *  {
         *      return false;
         *  }
         *  for (int i = 0; i < word.Length; i++)
         *  {
         *      char chr = word[i];
         *      if (!(chr >= 0x4E00 && chr <= 0x9FFF))
         *      {
         *          return false;
         *      }
         *  }
         *
         *  return true;
         * }*/

        /*private string word_seg(string srcdata)
         * {
         *  //StringBuilder sb = new StringBuilder();
         *  //sb.Remove(0, sb.Length);
         *  string t1 = "";
         *  ChineseAnalyzer analyzer = new Lucene.China.ChineseAnalyzer();
         *  //string FilePath = @"C:\Users\梁亦清\Documents\Visual Studio 2013\Projects\中科院分词简例\1.htm";
         *
         *  StringReader sr = new StringReader(srcdata);
         *  //Console.WriteLine(sr.ToString());
         *  //Environment.Exit(0);
         *  TokenStream stream = analyzer.TokenStream("", sr);
         *
         *  //long begin = System.DateTime.Now.Ticks;
         *  Lucene.Net.Analysis.Token t = stream.Next();
         *  while (t != null)
         *  {
         *      /*
         *      t1 = t.ToString();   //显示格式: (关键词,0,2) ,需要处理
         *      t1 = t1.Replace("(", "");
         *      char[] separator = { ',' };
         *      t1 = t1.Split(separator)[0];
         *      if (isChineseWord(t1))
         *      {
         *          sb.Append("{<" + t1 + ">}");
         *      }
         *      t = stream.Next();
         *  }
         *  //return sb.ToString()
         * }*/



        //	这个函数是学长代码的对外接口,我们沿用了这个接口,但使用的分词方法不是朴素贝叶斯

        /*public string DoWordSegment(string strIn)
         * {
         *  return word_seg(strIn);
         *
         * }*/

        public List <string> cutwords(string words, string analyzer = "Lucene.China.ChineseAnalyzer")
        {
            List <string> results = new List <string>();

            switch (analyzer)
            {
            case "Lucene.Net.Analysis.SimpleAnalyzer":
                SimpleAnalyzer            analyzerInstance0 = new SimpleAnalyzer();
                TokenStream               ts0 = analyzerInstance0.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token0;
                while ((token0 = ts0.Next()) != null)
                {
                    results.Add(token0.TermText());
                }
                ts0.Close();
                analyzerInstance0.Close();
                break;

            case "Lucene.Net.Analysis.KeywordAnalyzer":
                KeywordAnalyzer           analyzerInstance1 = new KeywordAnalyzer();
                TokenStream               ts1 = analyzerInstance1.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token1;
                while ((token1 = ts1.Next()) != null)
                {
                    results.Add(token1.TermText());
                }
                ts1.Close();
                analyzerInstance1.Close();
                break;

            case "Lucene.Net.Analysis.StopAnalyzer":
                StopAnalyzer analyzerInstance2 = new StopAnalyzer();
                TokenStream  ts2 = analyzerInstance2.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token2;
                while ((token2 = ts2.Next()) != null)
                {
                    results.Add(token2.TermText());
                }
                ts2.Close();
                analyzerInstance2.Close();
                break;

            case "Lucene.Net.Analysis.WhitespaceAnalyzer":
                WhitespaceAnalyzer        analyzerInstance3 = new WhitespaceAnalyzer();
                TokenStream               ts3 = analyzerInstance3.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token3;
                while ((token3 = ts3.Next()) != null)
                {
                    results.Add(token3.TermText());
                }
                ts3.Close();
                analyzerInstance3.Close();
                break;

            case "Lucene.Net.Analysis.PanGu.PanGuAnalyzer":
                PanGu.Segment.Init(@"G:\CProjects\Pipeline\pipeline\Pipeline\bin\Release\PanGu.xml");
                PanGuAnalyzer             analyzerInstance4 = new PanGuAnalyzer();
                TokenStream               ts4 = analyzerInstance4.TokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token4;
                while ((token4 = ts4.Next()) != null)
                {
                    results.Add(token4.TermText());
                }
                ts4.Close();
                analyzerInstance4.Close();
                break;

            case "Lucene.Net.Analysis.Standard.StandardAnalyzer":
                StandardAnalyzer          analyzerInstance5 = new StandardAnalyzer();
                TokenStream               ts5 = analyzerInstance5.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token5;
                while ((token5 = ts5.Next()) != null)
                {
                    results.Add(token5.TermText());
                }
                ts5.Close();
                analyzerInstance5.Close();
                break;

            case "Lucene.China.ChineseAnalyzer":
            default:
                ChineseAnalyzer           analyzerInstance6 = new ChineseAnalyzer();
                TokenStream               ts6 = analyzerInstance6.ReusableTokenStream("", new StringReader(words));
                Lucene.Net.Analysis.Token token6;
                while ((token6 = ts6.Next()) != null)
                {
                    results.Add(token6.TermText());
                }
                ts6.Close();
                analyzerInstance6.Close();
                break;
            }
            return(results);
        }