public static void IndexTopics(CSETWebEntities entity, IndexWriter writer) { foreach (CATALOG_RECOMMENDATIONS_DATA data in entity.CATALOG_RECOMMENDATIONS_DATA) { Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document(); string text = ""; text += " " + data.Heading + " " + data.Requirement + " " + data.Supplemental_Guidance + " " + data.Enhancement; lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Catalog_Recommendation.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); lucDoc.Add(new Field(FieldNames.DOC_ID, data.Data_Id.ToString(), Field.Store.YES, Field.Index.NO)); writer.AddDocument(lucDoc); } foreach (PROCUREMENT_LANGUAGE_DATA data in entity.PROCUREMENT_LANGUAGE_DATA) { Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document(); string text = ""; text += " " + data.Basis + " " + data.Language_Guidance + " " + data.Procurement_Language + " " + data.Fatmeasures + " " + data.Satmeasures + " " + data.Maintenance_Guidance; lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Procurement_Language.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); lucDoc.Add(new Field(FieldNames.DOC_ID, data.Procurement_Id.ToString(), Field.Store.YES, Field.Index.NO)); writer.AddDocument(lucDoc); } }
public void Dispose() { if (_context != null) { _context.Dispose(); _context = null; } }
public void IndexDocs() { string solutionPath = System.IO.Directory.GetParent(AppDomain.CurrentDomain.BaseDirectory).Parent.Parent.Parent.Parent.FullName; string documentDirectory = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\Documents"; string fullDocumentDirectory = Path.GetFullPath(documentDirectory); string luceneIndexDir = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\LuceneIndex2"; string luceneIndexDestDir = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\LuceneIndex"; CSETWebEntities entity = new CSETWebEntities(); Dictionary <int, GEN_FILE> dictionaryGenFiles = new Dictionary <int, GEN_FILE>(); List <REF_LIBRARY_PATH> listLibDocs = entity.REF_LIBRARY_PATH.ToList(); int count = 0; Debug.WriteLine("Number of Resource Nodes: " + listLibDocs.Count); foreach (REF_LIBRARY_PATH resnodes in listLibDocs) { foreach (GEN_FILE file in resnodes.GEN_FILE) { if (!dictionaryGenFiles.ContainsKey(file.Gen_File_Id)) { dictionaryGenFiles.Add(file.Gen_File_Id, file); } } } if (System.IO.Directory.Exists(luceneIndexDir)) { System.IO.Directory.Delete(luceneIndexDir, true); } DirectoryInfo di = System.IO.Directory.CreateDirectory(luceneIndexDir); DirectoryInfo dir = new DirectoryInfo(luceneIndexDir); FSDirectory fsdir = FSDirectory.Open(dir); using (IndexWriter writer = new IndexWriter(fsdir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED)) { Debug.WriteLine("Documents Count: " + dictionaryGenFiles.Values.Count); foreach (GEN_FILE doc in dictionaryGenFiles.Values) { //if (count > 30) // break; string filepath = fullDocumentDirectory + @"\" + doc.File_Name; bool exists = File.Exists(filepath); if (exists) { if (Path.GetExtension(filepath).ToLower() == ".pdf") { Document lucDoc = PDFParser.ParseDocument(filepath, doc); writer.AddDocument(lucDoc); Debug.WriteLine("Count: " + count + " Processed file: " + filepath); count++; } else if (Path.GetExtension(filepath).ToLower() == ".docx") { Document lucDoc = WordParser.ParseDocument(filepath, doc); writer.AddDocument(lucDoc); Debug.WriteLine("Count: " + count + "Processed file: " + filepath); count++; } else { Debug.Assert(false, "Can't read file because bad extension. Extension:" + filepath); } } else { Debug.WriteLine("File doesn't exist:" + filepath); } } TopicIndexer.IndexTopics(entity, writer); writer.Optimize(); } System.IO.Directory.Delete(luceneIndexDestDir, true); System.IO.Directory.Move(luceneIndexDir, luceneIndexDestDir); }
public FileRepository() { _context = new CSETWebEntities(); }