public void CalcScores(ColRegExpStatisticsProcessingParams param) { //MessageBox.Show("RegExpProcessor: CalcScores"); using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); /////////////////////////////////////////////////////////////////////////////// var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents", param.OnlyPositiveScore ? "Score > 0" : null); string query = "SELECT ED_ENC_NUM, NOTE_TEXT, Score FROM Documents"; if (param.OnlyPositiveScore) { query += " WHERE Score > 0"; } var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, query); /////////////////////////////////////////////////////////////////////////////// Parallel_CalcScores(documentRecords, docsCount, 1, 2, param); /////////////////////////////////////////////////////////////////////////////// var matchResults = new RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult>(_listRegExps.Select(x => new ColRegExpStatisticsProcessingResult { ID = x.ID, TotalMatches = x.TotalMatches, TotalDocuments = x.TotalDocuments })); matchResults.Serialize(param.GetFullPath(param.MatchesOutputFileName)); } }
protected void CalcDocumentScores(int columnIndexText, int columnIndexScore, IDataRecord record, ColRegExpStatisticsProcessingParams param) { if (record.IsDBNull(columnIndexText)) { return; } /////////////////////////////////////////////////////////////////////////////// double documentId = record.GetDouble(0); var docText = record.GetString(columnIndexText); var score = DatabaseHelper.GetInt32ValueInvariant(record, columnIndexScore); /////////////////////////////////////////////////////////////////////////////// using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); foreach (var regExp in _listRegExps) { var noteText = DatabaseHelper.GetNoteText(docsConnection, documentId, regExp.ExtractOptions.NoteTextColumn); CalcRegExpScore(regExp, noteText, score); } } }
protected void Parallel_CalcScores(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexText, int columnIndexScore, ColRegExpStatisticsProcessingParams param) { long progressStep = _listRegExps.Count; long progressMax = docsCount * progressStep; long progressValue = 0; Parallel.ForEach(enumerableDocs, (record, state) => { try { CalcDocumentScores(columnIndexText, columnIndexScore, record, param); /////////////////////////////////////////////////////////////////////////////// var threadProgressValue = Interlocked.Add(ref progressValue, progressStep); var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D); if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue)) { state.Stop(); } } catch (Exception ex) { Logger.HandleException(ex); } }); }
// not used function public RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult> CalcScores(DataTable table, IEnumerable <DataRow> documentRows, ColRegExpStatisticsProcessingParams param) { var columnIndexID = table.Columns.IndexOf("ED_ENC_NUM"); var columnIndexText = table.Columns.IndexOf("NOTE_TEXT"); if (columnIndexID == -1 || columnIndexText == -1) { throw new Exception("Cannot find source columns"); } /////////////////////////////////////////////////////////////////////////////// var docsCount = table.Rows.Count; Parallel_CalcScores(DatabaseHelper.AsDataRecordEnumerable(documentRows), docsCount, columnIndexID, columnIndexText, param); var matchResults = new RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult>(_listRegExps.Select(x => new ColRegExpStatisticsProcessingResult { ID = x.ID, TotalMatches = x.TotalMatches, TotalDocuments = x.TotalDocuments })); return(matchResults); }