コード例 #1
0
        /// <summary>
        /// Luceneインデックスに登録されているドキュメントの辞書を作成
        /// </summary>
        /// <param name="idxDir"></param>
        /// <returns></returns>
        public Dictionary <string, DocInfo> CreateDocumentDic(FSDirectory idxDir)
        {
            Dictionary <string, DocInfo> dic = new Dictionary <string, DocInfo>();
            IndexReader ir = DirectoryReader.Open(idxDir);

            try {
                int max = ir.MaxDoc();
                for (int i = 0; i < max; i++)
                {
                    var doc = ir.Document(i);

                    var docInfo = new DocInfo();
                    docInfo.Id         = i;
                    docInfo.Path       = doc.GetField(LuceneIndexBuilder.Path).StringValue();
                    docInfo.UpdateDate = DateTime.FromBinary(long.Parse(doc.GetField(LuceneIndexBuilder.UpdateDate).StringValue()));
                    //docInfo.UpdateDate = DateTime.FromBinary(long.Parse(doc.GetBinaryValue(LuceneIndexBuilder.UpdateDate).ToString()));
                    docInfo.Exists = false;

                    if (!dic.ContainsKey(docInfo.Path))
                    {
                        dic.Add(docInfo.Path, docInfo);
                    }
                }
            } finally {
                ir.Close();
            }

            return(dic);
        }
コード例 #2
0
        /// <summary>
        /// 指定したテキスト抽出器でテキスト化したものをインデックス化
        /// テキスト抽出器の種類は以下のとおり
        ///  ・Apache Tika
        ///  ・IFilter
        /// </summary>
        /// <param name="path"></param>
        /// <param name="indexWriter"></param>
        private bool AddDocument(string path, IndexWriter indexWriter, string threadName, Dictionary <string, DocInfo> docDic)
        {
            string   filename  = System.IO.Path.GetFileName(path);
            string   extension = System.IO.Path.GetExtension(path);
            FileInfo fi        = new FileInfo(path);

            if (extension == "" ||
                !_targetExtensionDic.ContainsKey(extension.ToLower()))
            {
                //拡張子なし or 対象拡張子外
                AppObject.Logger.Info(threadName + ":" + "Out of target extension. Skipped: " + path);
                Interlocked.Increment(ref _skippedCount);

                return(false);
            }
            if (extension.ToLower() != ".mp4" && fi.Length > this.FileSizeLimit)
            {
                //サイズオーバー(mp4は対象外)
                AppObject.Logger.Info(threadName + ":" + "File size over. Skipped: " + path);
                Interlocked.Increment(ref _skippedCount);

                return(false);
            }
            //存在するドキュメントか?
            if (docDic != null && docDic.ContainsKey(path))
            {
                DocInfo di = docDic[path];
                di.Exists    = true;
                docDic[path] = di;
                //更新日時チェック(秒単位で比較)
                if (di.UpdateDate < DateTimeUtil.Truncate(fi.LastWriteTime, TimeSpan.FromSeconds(1)))
                {
                    //更新されている場合Delete+Insert
                    Term t = new Term(LuceneIndexBuilder.Path, di.Path);
                    indexWriter.DeleteDocuments(t);
                }
                else
                {
                    //更新されていない。
                    AppObject.Logger.Info(threadName + ":" + "No updated. Skipped: " + path);
                    Interlocked.Increment(ref _skippedCount);

                    return(false);
                }
            }

            //ドキュメント追加
            Document doc = new Document();

            if (extension.ToLower() == ".md")
            {
                //Markdown形式
                string content = ReadToString(path);
                doc.Add(new Field(Content, content, _hilightFieldType));
            }
            else if (extension.ToLower() == ".txt")
            {
                //TXTファイル
                var sjis = Encoding.GetEncoding("Shift_JIS");
                if (FileUtil.GetTextEncoding(path) == sjis)
                {
                    string content = "";
                    using (var reader = new StreamReader(path, sjis)) {
                        content = reader.ReadToEnd();
                    }
                    doc.Add(new Field(Content, content, _hilightFieldType));
                }
                else
                {
                    if (_txtExtractMode == TextExtractModes.Tika)
                    {
                        var content = _txtExtractor.Extract(path);
                        doc.Add(new Field(Content, content.Text, _hilightFieldType));
                    }
                    else
                    {
                        doc.Add(new Field(Content, IFilterParser.Parse(path), _hilightFieldType));
                    }
                }
            }
            else
            {
                if (_txtExtractMode == TextExtractModes.Tika)
                {
                    var content = _txtExtractor.Extract(path);
                    doc.Add(new Field(Content, content.Text, _hilightFieldType));
                }
                else
                {
                    doc.Add(new Field(Content, IFilterParser.Parse(path), _hilightFieldType));
                }
            }

            doc.Add(new StringField(Path, path, FieldStore.YES));
            doc.Add(new StringField(Title, filename.ToLower(), FieldStore.YES));
            doc.Add(new StringField(Extension, extension.ToLower(), FieldStore.YES));
            //NOTE:Date型のFieldは存在しないのでlongで保持
            long l = long.Parse(fi.LastWriteTime.ToString("yyyyMMddHHmmss"));

            doc.Add(new LongPoint(UpdateDate, l));
            doc.Add(new StoredField(UpdateDate, l));
            //doc.Add(new StringField(UpdateDate,
            //    DateTools.DateToString(_sdf.parse(fi.LastWriteTime.ToString("yyyy/MM/dd")), DateToolsResolution.DAY),
            //    FieldStore.YES));
            indexWriter.AddDocument(doc);

            return(true);
        }