/// <summary> /// Luceneインデックスに登録されているドキュメントの辞書を作成 /// </summary> /// <param name="idxDir"></param> /// <returns></returns> public Dictionary <string, DocInfo> CreateDocumentDic(FSDirectory idxDir) { Dictionary <string, DocInfo> dic = new Dictionary <string, DocInfo>(); IndexReader ir = DirectoryReader.Open(idxDir); try { int max = ir.MaxDoc(); for (int i = 0; i < max; i++) { var doc = ir.Document(i); var docInfo = new DocInfo(); docInfo.Id = i; docInfo.Path = doc.GetField(LuceneIndexBuilder.Path).StringValue(); docInfo.UpdateDate = DateTime.FromBinary(long.Parse(doc.GetField(LuceneIndexBuilder.UpdateDate).StringValue())); //docInfo.UpdateDate = DateTime.FromBinary(long.Parse(doc.GetBinaryValue(LuceneIndexBuilder.UpdateDate).ToString())); docInfo.Exists = false; if (!dic.ContainsKey(docInfo.Path)) { dic.Add(docInfo.Path, docInfo); } } } finally { ir.Close(); } return(dic); }
/// <summary> /// 指定したテキスト抽出器でテキスト化したものをインデックス化 /// テキスト抽出器の種類は以下のとおり /// ・Apache Tika /// ・IFilter /// </summary> /// <param name="path"></param> /// <param name="indexWriter"></param> private bool AddDocument(string path, IndexWriter indexWriter, string threadName, Dictionary <string, DocInfo> docDic) { string filename = System.IO.Path.GetFileName(path); string extension = System.IO.Path.GetExtension(path); FileInfo fi = new FileInfo(path); if (extension == "" || !_targetExtensionDic.ContainsKey(extension.ToLower())) { //拡張子なし or 対象拡張子外 AppObject.Logger.Info(threadName + ":" + "Out of target extension. Skipped: " + path); Interlocked.Increment(ref _skippedCount); return(false); } if (extension.ToLower() != ".mp4" && fi.Length > this.FileSizeLimit) { //サイズオーバー(mp4は対象外) AppObject.Logger.Info(threadName + ":" + "File size over. Skipped: " + path); Interlocked.Increment(ref _skippedCount); return(false); } //存在するドキュメントか? if (docDic != null && docDic.ContainsKey(path)) { DocInfo di = docDic[path]; di.Exists = true; docDic[path] = di; //更新日時チェック(秒単位で比較) if (di.UpdateDate < DateTimeUtil.Truncate(fi.LastWriteTime, TimeSpan.FromSeconds(1))) { //更新されている場合Delete+Insert Term t = new Term(LuceneIndexBuilder.Path, di.Path); indexWriter.DeleteDocuments(t); } else { //更新されていない。 AppObject.Logger.Info(threadName + ":" + "No updated. Skipped: " + path); Interlocked.Increment(ref _skippedCount); return(false); } } //ドキュメント追加 Document doc = new Document(); if (extension.ToLower() == ".md") { //Markdown形式 string content = ReadToString(path); doc.Add(new Field(Content, content, _hilightFieldType)); } else if (extension.ToLower() == ".txt") { //TXTファイル var sjis = Encoding.GetEncoding("Shift_JIS"); if (FileUtil.GetTextEncoding(path) == sjis) { string content = ""; using (var reader = new StreamReader(path, sjis)) { content = reader.ReadToEnd(); } doc.Add(new Field(Content, content, _hilightFieldType)); } else { if (_txtExtractMode == TextExtractModes.Tika) { var content = _txtExtractor.Extract(path); doc.Add(new Field(Content, content.Text, _hilightFieldType)); } else { doc.Add(new Field(Content, IFilterParser.Parse(path), _hilightFieldType)); } } } else { if (_txtExtractMode == TextExtractModes.Tika) { var content = _txtExtractor.Extract(path); doc.Add(new Field(Content, content.Text, _hilightFieldType)); } else { doc.Add(new Field(Content, IFilterParser.Parse(path), _hilightFieldType)); } } doc.Add(new StringField(Path, path, FieldStore.YES)); doc.Add(new StringField(Title, filename.ToLower(), FieldStore.YES)); doc.Add(new StringField(Extension, extension.ToLower(), FieldStore.YES)); //NOTE:Date型のFieldは存在しないのでlongで保持 long l = long.Parse(fi.LastWriteTime.ToString("yyyyMMddHHmmss")); doc.Add(new LongPoint(UpdateDate, l)); doc.Add(new StoredField(UpdateDate, l)); //doc.Add(new StringField(UpdateDate, // DateTools.DateToString(_sdf.parse(fi.LastWriteTime.ToString("yyyy/MM/dd")), DateToolsResolution.DAY), // FieldStore.YES)); indexWriter.AddDocument(doc); return(true); }