protected RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> Parallel_ExtractValues(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, int columnIndexText, ColRegExpExtractProcessingParams param, Dictionary <int, CSScriptManager> scripts) { var results = new RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult>(); /////////////////////////////////////////////////////////////////////////////// long progressStep = _listRegExps.Count; long progressMax = docsCount * progressStep; long progressValue = 0; /////////////////////////////////////////////////////////////////////////////// Parallel.ForEach(enumerableDocs, (record, state) => { try { if (record.IsDBNull(columnIndexID)) { return; } /////////////////////////////////////////////////////////////////////////////// var documentID = record.GetDouble(columnIndexID); if (param.DocumentsList.Any() && !param.DocumentsList.Contains(documentID)) { return; } /////////////////////////////////////////////////////////////////////////////// ExtractDocumentValues(documentID, columnIndexText, record, scripts, param.ColumnID, results, param); /////////////////////////////////////////////////////////////////////////////// var threadProgressValue = Interlocked.Add(ref progressValue, progressStep); var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D); if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue)) { state.Stop(); } } catch (Exception ex) { Logger.HandleException(ex); } }); /////////////////////////////////////////////////////////////////////////////// return(results); }
protected void ExtractDocumentValues(double documentID, int columnIndexText, IDataRecord record, Dictionary <int, CSScriptManager> scripts, int columnID, RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> results, ColRegExpExtractProcessingParams param) { if (record.IsDBNull(columnIndexText)) { return; } /////////////////////////////////////////////////////////////////////////////// var docText = record.GetString(columnIndexText); if (scripts == null) { using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); foreach (var regExp in _listRegExps.Where(x => x.ExtractOptions != null && x.ExtractOptions.Extract)) { var noteText = DatabaseHelper.GetNoteText(docsConnection, documentID, regExp.ExtractOptions.NoteTextColumn); var extractResult = ExtractRegExpValues(regExp, documentID, noteText); if (extractResult != null) { results.Add(extractResult); } } /////////////////////////////////////////////////////////////////////////////// } } else { foreach (var pair in scripts) { var extractResult = ScriptExtractRegExpValues(documentID, docText, pair.Value, pair.Key); if (extractResult != null) { results.Add(extractResult); } } } }
public void ExtractValues(ColRegExpExtractProcessingParams param) { /////////////////////////////////////////////////////////////////////////////// //MessageBox.Show("ColRegExpProcessor: ExtractValues"); using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); /////////////////////////////////////////////////////////////////////////////// var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents", param.OnlyPositiveScore ? "Score > 0" : null); string query = "SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents";; if (param.ScriptExtract && param.ScriptCode != null) { int index = GetNoteColumnIndexFromCode(param.ScriptCode); if (index != 0) { query = "SELECT ED_ENC_NUM, NOTE_TEXT" + index.ToString() + " FROM Documents"; } } if (param.OnlyPositiveScore) { query += " WHERE Score > 0"; } var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, query); /////////////////////////////////////////////////////////////////////////////// Dictionary <int, CSScriptManager> scripts = null; if (param.ScriptExtract) { if (param.ColumnID != -1) { scripts = new Dictionary <int, CSScriptManager> { { param.ColumnID, CreateScriptManager(param.ScriptCode) } }; } else { scripts = GetScripts(param.RegExpDatabaseFilePath, param.Password); } } /////////////////////////////////////////////////////////////////////////////// var results = Parallel_ExtractValues(documentRecords, docsCount, 0, 1, param, scripts); results.Serialize(param.GetFullPath(param.ExtractOutputFileName)); } }