public void CalcStatistics(RegExpStatisticsProcessingParams param) { using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); /////////////////////////////////////////////////////////////////////////////// var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents"); var selectQuery = "SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents"; var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, selectQuery); /////////////////////////////////////////////////////////////////////////////// var intermediateResults = Parallel_CalcStatistics(documentRecords, docsCount, 0, 1, param); UpdateModifiedRows(intermediateResults, docsConnection); var results = AggregateStatistics(intermediateResults); results.Serialize(param.GetFullPath(param.OutputFileName)); } }
private void CalcDocumentStatistics(int columnIndexID, int columnIndexText, IDataRecord record, RegExpStatisticsProcessingParams param, ConcurrentQueue <RegExpStatisticsIntermediateResult> results) { if (record.IsDBNull(columnIndexID)) { return; } /////////////////////////////////////////////////////////////////////////////// if (record.IsDBNull(columnIndexText)) { return; } /////////////////////////////////////////////////////////////////////////////// var documentID = record.GetDouble(columnIndexID); var docText = record.GetString(columnIndexText); /////////////////////////////////////////////////////////////////////////////// var matches = this.Single.GetFilteredMatches(docText); if (matches.Any()) { var aggregatedList = matches.GroupBy(x => x.Value) .Select(g => new RegExpStatisticsIntermediateResult { Word = g.Key, Count = g.Count() }).ToList(); /////////////////////////////////////////////////////////////////////////////// try { if (param.Replace) { var singleResult = aggregatedList.First(); singleResult.Modified = true; singleResult.UpdatedText = this.Single.ReplaceMatches(docText, param.ReplacementeString); singleResult.DocumentID = documentID; } } catch (Exception ex) { this.Logger.HandleException(ex); } /////////////////////////////////////////////////////////////////////////////// aggregatedList.ForEach(x => results.Enqueue(x)); } }
private ConcurrentQueue <RegExpStatisticsIntermediateResult> Parallel_CalcStatistics(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, int columnIndexText, RegExpStatisticsProcessingParams param) { var results = new ConcurrentQueue <RegExpStatisticsIntermediateResult>(); /////////////////////////////////////////////////////////////////////////////// long progressStep = _listRegExps.Count; long progressMax = docsCount * progressStep; long progressValue = 0; ////////////////////////////////////////////////////////////////////////// Parallel.ForEach(enumerableDocs, (record, state) => { try { CalcDocumentStatistics(columnIndexID, columnIndexText, record, param, results); /////////////////////////////////////////////////////////////////////////////// var threadProgressValue = Interlocked.Add(ref progressValue, progressStep); var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D); if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue)) { state.Stop(); } } catch (Exception ex) { Logger.HandleException(ex); } }); /////////////////////////////////////////////////////////////////////////////// return(results); }