예제 #1
0
        public static void IndexTopics(CSETWebEntities entity, IndexWriter writer)
        {
            foreach (CATALOG_RECOMMENDATIONS_DATA data in entity.CATALOG_RECOMMENDATIONS_DATA)
            {
                Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document();

                string text = "";
                text += " " + data.Heading + " " + data.Requirement + " " + data.Supplemental_Guidance + " " + data.Enhancement;

                lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Catalog_Recommendation.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                lucDoc.Add(new Field(FieldNames.DOC_ID, data.Data_Id.ToString(), Field.Store.YES, Field.Index.NO));
                writer.AddDocument(lucDoc);
            }

            foreach (PROCUREMENT_LANGUAGE_DATA data in entity.PROCUREMENT_LANGUAGE_DATA)
            {
                Lucene.Net.Documents.Document lucDoc = new Lucene.Net.Documents.Document();

                string text = "";
                text += " " + data.Basis + " " + data.Language_Guidance + " " + data.Procurement_Language + " " + data.Fatmeasures + " " + data.Satmeasures + " " + data.Maintenance_Guidance;

                lucDoc.Add(new Field(FieldNames.SHORT_NAME, data.Topic_Name, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED));
                lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Procurement_Language.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));                lucDoc.Add(new Field(FieldNames.DOC_ID, data.Procurement_Id.ToString(), Field.Store.YES, Field.Index.NO));
                writer.AddDocument(lucDoc);
            }
        }
예제 #2
0
 public void Dispose()
 {
     if (_context != null)
     {
         _context.Dispose();
         _context = null;
     }
 }
예제 #3
0
        public void IndexDocs()
        {
            string solutionPath = System.IO.Directory.GetParent(AppDomain.CurrentDomain.BaseDirectory).Parent.Parent.Parent.Parent.FullName;

            string documentDirectory     = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\Documents";
            string fullDocumentDirectory = Path.GetFullPath(documentDirectory);

            string luceneIndexDir     = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\LuceneIndex2";
            string luceneIndexDestDir = solutionPath + @"\CSETWeb_Api\CSETWeb_Api\LuceneIndex";

            CSETWebEntities            entity             = new CSETWebEntities();
            Dictionary <int, GEN_FILE> dictionaryGenFiles = new Dictionary <int, GEN_FILE>();
            List <REF_LIBRARY_PATH>    listLibDocs        = entity.REF_LIBRARY_PATH.ToList();
            int count = 0;

            Debug.WriteLine("Number of Resource Nodes: " + listLibDocs.Count);
            foreach (REF_LIBRARY_PATH resnodes in listLibDocs)
            {
                foreach (GEN_FILE file in resnodes.GEN_FILE)
                {
                    if (!dictionaryGenFiles.ContainsKey(file.Gen_File_Id))
                    {
                        dictionaryGenFiles.Add(file.Gen_File_Id, file);
                    }
                }
            }


            if (System.IO.Directory.Exists(luceneIndexDir))
            {
                System.IO.Directory.Delete(luceneIndexDir, true);
            }

            DirectoryInfo di = System.IO.Directory.CreateDirectory(luceneIndexDir);

            DirectoryInfo dir = new DirectoryInfo(luceneIndexDir);


            FSDirectory fsdir = FSDirectory.Open(dir);

            using (IndexWriter writer = new IndexWriter(fsdir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                Debug.WriteLine("Documents Count: " + dictionaryGenFiles.Values.Count);
                foreach (GEN_FILE doc in dictionaryGenFiles.Values)
                {
                    //if (count > 30)
                    //   break;

                    string filepath = fullDocumentDirectory + @"\" + doc.File_Name;
                    bool   exists   = File.Exists(filepath);
                    if (exists)
                    {
                        if (Path.GetExtension(filepath).ToLower() == ".pdf")
                        {
                            Document lucDoc = PDFParser.ParseDocument(filepath, doc);
                            writer.AddDocument(lucDoc);
                            Debug.WriteLine("Count: " + count + " Processed file: " + filepath);
                            count++;
                        }
                        else if (Path.GetExtension(filepath).ToLower() == ".docx")
                        {
                            Document lucDoc = WordParser.ParseDocument(filepath, doc);
                            writer.AddDocument(lucDoc);
                            Debug.WriteLine("Count: " + count + "Processed file: " + filepath);
                            count++;
                        }
                        else
                        {
                            Debug.Assert(false, "Can't read file because bad extension. Extension:" + filepath);
                        }
                    }
                    else
                    {
                        Debug.WriteLine("File doesn't exist:" + filepath);
                    }
                }

                TopicIndexer.IndexTopics(entity, writer);


                writer.Optimize();
            }

            System.IO.Directory.Delete(luceneIndexDestDir, true);
            System.IO.Directory.Move(luceneIndexDir, luceneIndexDestDir);
        }
예제 #4
0
 public FileRepository()
 {
     _context = new CSETWebEntities();
 }