Ejemplo n.º 1
0
        public void CalcStatistics(RegExpStatisticsProcessingParams param)
        {
            using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
            {
                docsConnection.Open();

                ///////////////////////////////////////////////////////////////////////////////

                var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents");

                var selectQuery = "SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents";

                var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, selectQuery);

                ///////////////////////////////////////////////////////////////////////////////

                var intermediateResults = Parallel_CalcStatistics(documentRecords, docsCount, 0, 1, param);

                UpdateModifiedRows(intermediateResults, docsConnection);

                var results = AggregateStatistics(intermediateResults);

                results.Serialize(param.GetFullPath(param.OutputFileName));
            }
        }
Ejemplo n.º 2
0
        private void CalcDocumentStatistics(int columnIndexID, int columnIndexText, IDataRecord record, RegExpStatisticsProcessingParams param, ConcurrentQueue <RegExpStatisticsIntermediateResult> results)
        {
            if (record.IsDBNull(columnIndexID))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            if (record.IsDBNull(columnIndexText))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            var documentID = record.GetDouble(columnIndexID);
            var docText    = record.GetString(columnIndexText);

            ///////////////////////////////////////////////////////////////////////////////

            var matches = this.Single.GetFilteredMatches(docText);

            if (matches.Any())
            {
                var aggregatedList = matches.GroupBy(x => x.Value)
                                     .Select(g => new RegExpStatisticsIntermediateResult
                {
                    Word  = g.Key,
                    Count = g.Count()
                }).ToList();

                ///////////////////////////////////////////////////////////////////////////////

                try
                {
                    if (param.Replace)
                    {
                        var singleResult = aggregatedList.First();
                        singleResult.Modified    = true;
                        singleResult.UpdatedText = this.Single.ReplaceMatches(docText, param.ReplacementeString);
                        singleResult.DocumentID  = documentID;
                    }
                }
                catch (Exception ex)
                {
                    this.Logger.HandleException(ex);
                }

                ///////////////////////////////////////////////////////////////////////////////

                aggregatedList.ForEach(x => results.Enqueue(x));
            }
        }
Ejemplo n.º 3
0
        private ConcurrentQueue <RegExpStatisticsIntermediateResult> Parallel_CalcStatistics(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, int columnIndexText, RegExpStatisticsProcessingParams param)
        {
            var results = new ConcurrentQueue <RegExpStatisticsIntermediateResult>();

            ///////////////////////////////////////////////////////////////////////////////

            long progressStep  = _listRegExps.Count;
            long progressMax   = docsCount * progressStep;
            long progressValue = 0;

            //////////////////////////////////////////////////////////////////////////

            Parallel.ForEach(enumerableDocs, (record, state) =>
            {
                try
                {
                    CalcDocumentStatistics(columnIndexID, columnIndexText, record, param, results);

                    ///////////////////////////////////////////////////////////////////////////////

                    var threadProgressValue = Interlocked.Add(ref progressValue, progressStep);

                    var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D);

                    if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue))
                    {
                        state.Stop();
                    }
                }
                catch (Exception ex)
                {
                    Logger.HandleException(ex);
                }
            });

            ///////////////////////////////////////////////////////////////////////////////

            return(results);
        }