private void AddPdfToIndexe(string fileName) { Task t = new Task(() => { var Pdfdirectory = "C:\\SearchMe"; var pathPdf = Pdfdirectory + "\\" + Guid.NewGuid().ToString() + ".txt"; if (!Directory.Exists(Pdfdirectory)) { Directory.CreateDirectory(Pdfdirectory); } //should change to application startup path if (!File.Exists(pathPdf)) { File.Create(pathPdf).Dispose(); } Utility.WritePdfContentToTxtFile(Pdfdirectory, fileName, pathPdf); DataForIndex dfi = Utility.CreatePdfIndexedDocument(fileName, pathPdf); LuceneBussines lucene = new LuceneBussines(); lucene.CreateIndex(dfi); IndexDocCount++; LableTextChange(DocumentCountLable, IndexDocCount.ToString()); }); t.Start(); }
public static DataForIndex CreateSoundIndexDoucment(string fn) { DataForIndex dfi = new DataForIndex(); try { var file = TagLib.File.Create(fn); dfi.ID = new Random().Next(int.MaxValue); dfi.Label = "Music"; dfi.FileExtension = Path.GetExtension(fn); dfi.AudioDuration = file.Properties.Duration.ToString(); dfi.AudioBitrate = file.Properties.AudioBitrate.ToString(); foreach (var item in file.Tag.Genres) { dfi.AudioGenre += item + " , "; } dfi.AudioAlbum = file.Tag.Album; dfi.FileName = fn; dfi.Body = fn + " , " + dfi.AudioGenre + " , " + dfi.AudioDuration + " , " + dfi.AudioAlbum; } catch (Exception ex) { FileInfo fi = new FileInfo(fn); fi.GetType(); } return(dfi); }
public void indexsite(DataForIndex LuceneForSite, string url, string sitetxt) { dfi2.ID = new Random().Next(int.MaxValue); dfi2.FileName = url; dfi2.Body = Utility.RemoveHtmlTags(sitetxt); Thread t = new Thread(GoForIndexSite); t.Start(); }
public static DataForIndex CreateDocumentIndex(FilterReader reader, string fileName) { var dfi = new DataForIndex(); dfi.ID = new Random().Next(int.MaxValue); dfi.Label = "Docs"; dfi.FileExtension = Path.GetExtension(fileName); dfi.FileName = fileName; dfi.Body = reader.ReadToEnd(); return(dfi); }
private void AddSoundToIndexed(string fn) { Task t = new Task(() => { DataForIndex dfi = Utility.CreateSoundIndexDoucment(fn); LuceneBussines lucene = new LuceneBussines(); lucene.CreateIndex(dfi); IndexMp3Count++; LableTextChange(CountFileIndexedLabel, IndexMp3Count.ToString()); }); t.Start(); }
private void AddVideoToIndexed(string fn) { Task t = new Task(() => { DataForIndex dfi = Utility.CreateVideoIndexDoucment(fn); LuceneBussines lucene = new LuceneBussines(); lucene.CreateIndex(dfi); IndexMovieCount++; LableTextChange(Videolblcount, IndexMovieCount.ToString()); }); t.Start(); }
private void AddDocumentToIndexed(string fileName, FilterReaderTimeout timeoutOption) { Task t = new Task(() => { try { var reader = new FilterReader(fileName, string.Empty, disableEmbeddedContent: false, includeProperties: false, readIntoMemory: false, filterReaderTimeout: timeoutOption, timeout: -1); DataForIndex dfi = Utility.CreateDocumentIndex(reader, fileName); LuceneBussines lucene = new LuceneBussines(); lucene.CreateIndex(dfi); IndexDocCount++; LableTextChange(DocumentCountLable, IndexDocCount.ToString()); } catch (Exception ex) { try { var reader = new FilterReader(fileName, string.Empty, disableEmbeddedContent: false, includeProperties: false, readIntoMemory: true, filterReaderTimeout: FilterReaderTimeout.TimeoutWithException, timeout: 5000); if (reader == null) { return; } ; DataForIndex dfi = Utility.CreateDocumentIndex(reader, fileName); LuceneBussines lucene = new LuceneBussines(); lucene.CreateIndex(dfi); IndexDocCount++; LableTextChange(DocumentCountLable, IndexDocCount.ToString()); } catch (Exception) { } } }); t.Start(); }
public static DataForIndex CreatePdfIndexedDocument(string fileName, string pathPdf) { var dfi = new DataForIndex(); dfi.ID = new Random().Next(int.MaxValue); dfi.Label = "Docs"; dfi.FileName = fileName; dfi.FileExtension = Path.GetExtension(fileName); StreamReader sr = new StreamReader(pathPdf); dfi.Body = sr.ReadToEnd(); sr.Close(); sr.Dispose(); File.Delete(pathPdf); return(dfi); }
public static DataForIndex CreateVideoIndexDoucment(string fn) { DataForIndex dfi = new DataForIndex(); try { var file = new FileInfo(fn); dfi.ID = new Random().Next(int.MaxValue); dfi.Label = "Music"; dfi.FileExtension = Path.GetExtension(fn); dfi.FileName = fn; dfi.Body = fn + " , " + dfi.FileExtension + " , " + dfi.Label; } catch { } return(dfi); }
public void CreateIndexSite(DataForIndex dfi) { sem.WaitOne(); var directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\LuceneIndexSite")); using ( var writer = new IndexWriter(directory, Analyzer, create: false, mfl: IndexWriter.MaxFieldLength.UNLIMITED)) { lock (writer) { Document doc = new Document(); doc.Add(new Field("ID", dfi.ID.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); doc.Add(new Field("FileName", dfi.FileName ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Body", dfi.Body ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("FileExtension", dfi.FileExtension ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("AudioGenre", dfi.AudioGenre ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("Label", dfi.Label ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("AudioAlbum", dfi.AudioAlbum ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("AudioBitrate", dfi.AudioBitrate ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("AudioDuration", dfi.AudioDuration ?? "-", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); //sem.WaitOne(); writer.AddDocument(doc); CountIndex++; //sem.Release(); //writer.Optimize(); writer.Commit(); writer.Dispose(); } } sem.Release(); }
public static void CreateIndexForStatistics(DataForIndex dfi) { FileInfo file = new FileInfo("stopwords.txt"); var directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\StatisticsIndex")); var analyzer = new StandardAnalyzer(_version, file); using (var writer = new IndexWriter(directory, analyzer, create: false, mfl: IndexWriter.MaxFieldLength.UNLIMITED)) { var section = string.Empty; foreach (var line in File.ReadAllLines(dfi.FileName)) { Document postDocument = new Document(); postDocument.Add(new Field("Id", new Random().Next().ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); postDocument.Add(new Field("Body", line, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(postDocument); section = string.Empty; } writer.Optimize(); writer.Commit(); writer.Dispose(); } }
private bool ShowResultinRichtxtsites(DataForIndex finddfi, string item, string searchvalue) { int index = item.IndexOf(searchvalue, StringComparison.Ordinal); if (index != -1) { if (finddfi.IsFarsiArabic == true) { string term = searchvalue; if (!finddfi.HarfArabic.Contains("#")) { term = term.Replace(finddfi.HarfArabic, finddfi.HarfFarsi); index = item.IndexOf(term); } } if (index >= 0) { RichtxtResultSite.AppendText(item.Substring(0, index)); RichtxtResultSite.SelectionBackColor = Color.Yellow; Font fonttext = RichtxtResult.SelectionFont; RichtxtResultSite.SelectionFont = new Font("Tahoma", 12, FontStyle.Bold); RichtxtResultSite.AppendText(item.Substring(index, searchvalue.Length)); RichtxtResultSite.SelectionBackColor = Color.White; RichtxtResultSite.SelectionFont = new Font(fonttext, FontStyle.Regular); RichtxtResultSite.AppendText( item.Substring(index + searchvalue.Length, item.Length - (index + searchvalue.Length)) + Environment.NewLine + "//////////////////" + Environment.NewLine); return(true); } } else { return(false); } return(false); }
private static List <DataForIndex> getResult(List <DataForIndex> dfi, ScoreDoc[] hits, string term, IndexSearcher searcher, Query query, MultiFieldQueryParser parser) { if (hits.Length == 0) { term = searchByPartialWords(term); query = parseQuery(term, parser); hits = searcher.Search(query, 100).ScoreDocs; } foreach (var scoreDoc in hits) { var doc = searcher.Doc(scoreDoc.Doc); var score = scoreDoc.Score; DataForIndex listdata = new DataForIndex(); listdata.ID = int.Parse(doc.Get("ID")); listdata.FileName = doc.Get("FileName"); listdata.SearchWord = term; listdata.FileExtension = doc.Get("FileExtension"); listdata.AudioGenre = doc.Get("AudioGenre"); listdata.AudioAlbum = doc.Get("AudioAlbum"); listdata.AudioBitrate = doc.Get("AudioBitrate"); listdata.AudioDuration = doc.Get("AudioDuration"); listdata.Label = doc.Get("Label"); if (isFarsiArabic) { listdata.IsFarsiArabic = true; listdata.HarfArabic = HarfArabic; listdata.HarfFarsi = HarfFarsi; } listdata.Score = score; TermPositionVector obj_vector = (TermPositionVector)searcher.IndexReader.GetTermFreqVector(scoreDoc.Doc, "Body"); int int_phraseIndex = obj_vector.IndexOf(term.Split(' ').FirstOrDefault()); TermVectorOffsetInfo[] obj_offsetInfo = obj_vector.GetOffsets(int_phraseIndex); StringBuilder text = new StringBuilder(); for (int i = 0; i < obj_offsetInfo.Length; i++) { string body = doc.Get("Body"); int start = obj_offsetInfo[i].StartOffset; int end = body.Length; int count = 100; if (start + count <= end) { end = start + count; } if (start > count) { start = start - count; } else { start = 0; } text.Append(body.Substring(start, end - start) + " # "); } listdata.ResultText = text.ToString(); if (dfi.FirstOrDefault(x => x.FileName == listdata.FileName) == null) { dfi.Add(listdata); } else { var del = dfi.FirstOrDefault(x => x.FileName == listdata.FileName); dfi.Remove(del); del.SearchWord = del.SearchWord + " + " + listdata.SearchWord; del.ResultText = del.ResultText + " " + listdata.ResultText; dfi.Add(del); } } return(dfi); }