コード例 #1
0
        public void CalcScores(ColRegExpStatisticsProcessingParams param)
        {
            //MessageBox.Show("RegExpProcessor: CalcScores");
            using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
            {
                docsConnection.Open();

                ///////////////////////////////////////////////////////////////////////////////

                var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents", param.OnlyPositiveScore ? "Score > 0" : null);

                string query = "SELECT ED_ENC_NUM, NOTE_TEXT, Score FROM Documents";

                if (param.OnlyPositiveScore)
                {
                    query += " WHERE Score > 0";
                }

                var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, query);

                ///////////////////////////////////////////////////////////////////////////////

                Parallel_CalcScores(documentRecords, docsCount, 1, 2, param);

                ///////////////////////////////////////////////////////////////////////////////

                var matchResults = new RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult>(_listRegExps.Select(x => new ColRegExpStatisticsProcessingResult
                {
                    ID             = x.ID,
                    TotalMatches   = x.TotalMatches,
                    TotalDocuments = x.TotalDocuments
                }));

                matchResults.Serialize(param.GetFullPath(param.MatchesOutputFileName));
            }
        }
コード例 #2
0
        protected void CalcDocumentScores(int columnIndexText, int columnIndexScore, IDataRecord record, ColRegExpStatisticsProcessingParams param)
        {
            if (record.IsDBNull(columnIndexText))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            double documentId = record.GetDouble(0);

            var docText = record.GetString(columnIndexText);

            var score = DatabaseHelper.GetInt32ValueInvariant(record, columnIndexScore);

            ///////////////////////////////////////////////////////////////////////////////
            using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
            {
                docsConnection.Open();
                foreach (var regExp in _listRegExps)
                {
                    var noteText = DatabaseHelper.GetNoteText(docsConnection, documentId, regExp.ExtractOptions.NoteTextColumn);

                    CalcRegExpScore(regExp, noteText, score);
                }
            }
        }
コード例 #3
0
        protected void Parallel_CalcScores(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexText, int columnIndexScore, ColRegExpStatisticsProcessingParams param)
        {
            long progressStep  = _listRegExps.Count;
            long progressMax   = docsCount * progressStep;
            long progressValue = 0;

            Parallel.ForEach(enumerableDocs, (record, state) =>
            {
                try
                {
                    CalcDocumentScores(columnIndexText, columnIndexScore, record, param);

                    ///////////////////////////////////////////////////////////////////////////////

                    var threadProgressValue = Interlocked.Add(ref progressValue, progressStep);

                    var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D);

                    if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue))
                    {
                        state.Stop();
                    }
                }
                catch (Exception ex)
                {
                    Logger.HandleException(ex);
                }
            });
        }
コード例 #4
0
        // not used function
        public RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult> CalcScores(DataTable table, IEnumerable <DataRow> documentRows, ColRegExpStatisticsProcessingParams param)
        {
            var columnIndexID   = table.Columns.IndexOf("ED_ENC_NUM");
            var columnIndexText = table.Columns.IndexOf("NOTE_TEXT");

            if (columnIndexID == -1 || columnIndexText == -1)
            {
                throw new Exception("Cannot find source columns");
            }

            ///////////////////////////////////////////////////////////////////////////////

            var docsCount = table.Rows.Count;

            Parallel_CalcScores(DatabaseHelper.AsDataRecordEnumerable(documentRows), docsCount, columnIndexID, columnIndexText, param);

            var matchResults = new RegExpProcessingResultsCollection <ColRegExpStatisticsProcessingResult>(_listRegExps.Select(x => new ColRegExpStatisticsProcessingResult
            {
                ID             = x.ID,
                TotalMatches   = x.TotalMatches,
                TotalDocuments = x.TotalDocuments
            }));

            return(matchResults);
        }