Exemplo n.º 1
0
        const string INDEXDIR  = @"c:\temp\ldn-index";   //インデックスの場所

        static void Main(string[] args)
        {
            DirectoryInfo sourceDirectory = new DirectoryInfo(TARGETDIR);
            FSDirectory   dir             = FSDirectory.Open(INDEXDIR);

            // テキストの解析方法(アナライザー)を定義
            JapaneseAnalyzer  analyzer = new JapaneseAnalyzer(LuceneVersion.LUCENE_48);
            IndexWriterConfig config   = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);

            config.OpenMode = OpenMode.CREATE_OR_APPEND;

            //開始時間の取得
            DateTime startDt = DateTime.Now;

            using (IndexWriter writer = new IndexWriter(dir, config))
            {
                IndexDocs(writer, sourceDirectory);
            }

            //終了時間の取得
            DateTime endDt = DateTime.Now;

            System.Console.WriteLine("{0}タイマ刻み数かかりました", (endDt - startDt).Ticks);

            Console.ReadKey();
        }
Exemplo n.º 2
0
        private void CreateIndexButton_Click(object sender, EventArgs e)
        {
            java.nio.file.Path idxPath = FileSystems.getDefault().getPath(IndexDir);
            FSDirectory        dir     = FSDirectory.Open(idxPath);

            JapaneseAnalyzer  analyzer = new JapaneseAnalyzer();
            IndexWriterConfig config   = new IndexWriterConfig(analyzer);
            IndexWriter       writer   = new IndexWriter(dir, config);

            string[] files = System.IO.Directory.GetFiles(this.TargetDirText.Text, "*.htm*", System.IO.SearchOption.AllDirectories);

            try {
                foreach (string file in files)
                {
                    string title, content, f;
                    title   = "";
                    content = "";
                    f       = file;
                    HTMLParse(ref title, ref content, ref f);

                    Field fldTitle   = new StringField("title", title, FieldStore.YES);
                    Field fldPlace   = new StringField("place", f, FieldStore.YES);
                    Field fldContent = new TextField("content", content, FieldStore.YES);

                    Document doc = new Document();
                    doc.Add(fldTitle);
                    doc.Add(fldPlace);
                    doc.Add(fldContent);
                    writer.AddDocument(doc);
                }
            } catch (System.IO.IOException ex) {
                System.Console.WriteLine(ex.ToString());
            }
            writer.Close();
        }
Exemplo n.º 3
0
        const string INDEXDIR = @"c:\temp\ldn-index";  //インデックスの場所

        static void Main(string[] args)
        {
            FSDirectory dir = FSDirectory.Open(INDEXDIR);

            // テキストの解析方法(アナライザー)を定義
            JapaneseAnalyzer analyzer = new JapaneseAnalyzer(LuceneVersion.LUCENE_48);

            using (IndexReader reader = DirectoryReader.Open(dir))
            {
                IndexSearcher searcher = new IndexSearcher(reader);

                //開始時間の取得
                DateTime startDt = DateTime.Now;

                QueryParser parser = new QueryParser(LuceneVersion.LUCENE_48, "contents", analyzer);

                //var docs = searcher.Search(parser.Parse("0"), 10);
                var docs = searcher.Search(new FuzzyQuery(new Term("0"), 2), 10);
                Console.WriteLine(docs.TotalHits);
                ScoreDoc[] hits = docs.ScoreDocs;

                System.Console.WriteLine("Found " + hits.Length + " hits.");
                for (int i = 0; i < hits.Length; ++i)
                {
                    int      docId = hits[i].Doc;
                    Document d     = searcher.Doc(docId);
                    System.Console.WriteLine((i + 1) + ". " + d.Get("path") + "\t" + d.Get("modified") + "\t" + hits[i].Score);
                }

                //終了時間の取得
                DateTime endDt = DateTime.Now;
                System.Console.WriteLine("{0}タイマ刻み数かかりました", (endDt - startDt).Ticks);
            }

            Console.ReadKey();
        }
Exemplo n.º 4
0
        /// <summary>
        /// 日本語アナライザ初期化処理
        /// </summary>
        private static void InitializeAnalyzer()
        {
            string sqliteDataSource = Directory.GetParent(Application.ExecutablePath).FullName +
                                      Properties.Settings.Default.SQLITE_DATA_SOURCE;

            AppObject.ConnectString = AppObject.GetConnectString(sqliteDataSource);

            //ユーザ辞書の設定
            AppObject.RootDirPath  = Directory.GetParent(Application.ExecutablePath).FullName;
            AppObject.RootDirPath += LuceneIndexBuilder.StoreDirName;

            //Analyzer
            java.io.Reader treader = new java.io.FileReader(AppObject.RootDirPath + @".\..\UserDictionary.txt");
            UserDictionary userDic = null;

            try {
                //ユーザ辞書
                userDic = UserDictionary.Open(treader);
            } finally {
                treader.close();
            }

            AppObject.AppAnalyzer = new JapaneseAnalyzer(userDic, //ユーザ定義辞書
                                                         JapaneseTokenizerMode.SEARCH,
                                                         JapaneseAnalyzer.GetDefaultStopSet(),
                                                         JapaneseAnalyzer.GetDefaultStopTags());
            //JapaneseTokenizerMode.EXTENDED;
            // ->拡張モードは、未知の単語のユニグラムを出力します。
            //JapaneseTokenizerMode.NORMAL;
            // ->通常のセグメンテーション:化合物の分解なし
            //JapaneseTokenizerMode.SEARCH;
            // ->検索を対象としたセグメンテーション:
            //   これには、長い名詞の複合化プロセスが含まれ、同義語としての完全な複合トークンも含まれます。

            //AppObject.AppAnalyzer = new JapaneseAnalyzer();
        }