static void Main(string[] args) { //Te ścieżki trzeba zmienić string pathToArticles = @"..\..\..\..\TestData\eksTRAKTOR_output\pagesTexts"; string pathToIndex = @"..\..\..\..\TestData\index"; //utworzenie całego indeksu - łącznie z pozycjami tokenow Console.WriteLine("Started {0}", DateTime.Now); IxCreator creator = new IxCreator( new IxBufferedCrawler(new IxScatteredStorageCrawler(pathToArticles)), new IxStdRIIndexer(pathToIndex, 12), pathToIndex); creator.DoIndexing(); Console.WriteLine("Finished {0}", DateTime.Now); /* //utworzenie plików z pozycjami (TYLKO DO CELÓW DEBUGOWYCH) IxStdDiskFwdIndex fwdIndex = new IxStdDiskFwdIndex(pathToIndex + @"\fwdIndex", IxIndexMode.READ); IxTokenPositionsIndex index = new IxTokenPositionsIndex(pathToIndex); index.Create(fwdIndex); */ }
/// <summary> /// Uruchamia ekstrakcję treści stron znalezionych w dokumencie XML do plików tekstowych /// </summary> /// /// <param name="inputXMLPath">Ścieżka do dokumentu XML zrzutu artykułów wiki (pages-articles.xml)</param> /// <param name="outputDirPath">Ścieżka do istniejącego katalogu, do którego mają być ekstraktowane teksty artykułów</param> public void run(String inputXMLPath, String outputDirPath) { if (advisorThread.IsBusy) throw new eTRAlreadyWorkingException(); DirectoryInfo di = new DirectoryInfo(outputDirPath); if (!di.Exists) throw new eTROutputDirDontExistsException(); if (di.GetFiles().Length != 0 || di.GetDirectories().Length != 0) throw new eTROutputDirNotEmptyException(); this.inputXMLPath = inputXMLPath; outputDirPath.TrimEnd(new char[] { '\\' }); extractedPages = 0; cleaner = new eTRCleaner(messageCollector); if (eTRSettings.saveAsFiles || eTRSettings.saveAsFilesOriginalPageText) distributor = new eTRDistributor(outputDirPath + "\\pagesTexts", messageCollector); if (eTRSettings.createCollectiveFile) collector = new eTRCollector(outputDirPath + "\\collectedArticles.dat"); if (eTRSettings.createIndyx) { indyxDocs = new IxManualServedCrawler(); indyxIndexer = new IxStdRIIndexer(outputDirPath + "\\index", eTRSettings.indyxThreadCount); indyxCreator = new IxCreator(indyxDocs, indyxIndexer, outputDirPath + "\\index"); indyxThread = new Thread(new ThreadStart(indyxCreator.DoIndexing)); indyxThread.Start(); } ThreadPool.SetMaxThreads(eTRSettings.createdWorkerThreadsCount, eTRSettings.completionPortThreadsCount); pageNodesBuffer = new HashSet<Match>(); inputXMLBuffer = new StringBuilder(eTRSettings.bufferStartCapacity); works = new Match[eTRSettings.createdWorkerThreadsCount]; workFinishedEvents = new ManualResetEvent[eTRSettings.createdWorkerThreadsCount]; worksLocks = new Object[eTRSettings.createdWorkerThreadsCount]; for (int i = 0; i < eTRSettings.createdWorkerThreadsCount; i++) worksLocks[i] = new Object(); advisorThread.RunWorkerAsync(); }