예제 #1
0
        protected void ExtractDocumentValues(double documentID, int columnIndexText, IDataRecord record, Dictionary <int, CSScriptManager> scripts, int columnID, RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> results, ColRegExpExtractProcessingParams param)
        {
            if (record.IsDBNull(columnIndexText))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            var docText = record.GetString(columnIndexText);

            if (scripts == null)
            {
                using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
                {
                    docsConnection.Open();

                    foreach (var regExp in _listRegExps.Where(x => x.ExtractOptions != null && x.ExtractOptions.Extract))
                    {
                        var noteText = DatabaseHelper.GetNoteText(docsConnection, documentID, regExp.ExtractOptions.NoteTextColumn);

                        var extractResult = ExtractRegExpValues(regExp, documentID, noteText);
                        if (extractResult != null)
                        {
                            results.Add(extractResult);
                        }
                    }

                    ///////////////////////////////////////////////////////////////////////////////
                }
            }
            else
            {
                foreach (var pair in scripts)
                {
                    var extractResult = ScriptExtractRegExpValues(documentID, docText, pair.Value, pair.Key);
                    if (extractResult != null)
                    {
                        results.Add(extractResult);
                    }
                }
            }
        }
예제 #2
0
        protected RegExpProcessingResultsCollection <RegExpScoreProcessingResult> Parallel_CalcScores(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, List <int> columnIndexTextList, List <RegExpSynergy> synergies)
        {
            var results = new RegExpProcessingResultsCollection <RegExpScoreProcessingResult>();

            ///////////////////////////////////////////////////////////////////////////////

            long progressStep  = _listRegExps.Count;
            long progressMax   = docsCount * progressStep;
            long progressValue = 0;

            //////////////////////////////////////////////////////////////////////////

            Parallel.ForEach(enumerableDocs, (record, state) =>
            {
                try
                {
                    //List<int> tmp = new List<int>();
                    //tmp.Add(1);
                    var scoreResult = CalcDocumentScore(columnIndexID, columnIndexTextList, record, synergies);

                    if (scoreResult != null)
                    {
                        results.Add(scoreResult);
                    }


                    ///////////////////////////////////////////////////////////////////////////////

                    var threadProgressValue = Interlocked.Add(ref progressValue, progressStep);

                    var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D);

                    if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue))
                    {
                        state.Stop();
                    }
                }
                catch (Exception ex)
                {
                    //Logger.AppendToLog("Parallel_CalcScores");
                    Logger.HandleException(ex);
                }
            });

            ///////////////////////////////////////////////////////////////////////////////

            return(results);
        }
예제 #3
0
        private void CalcDocumentStatisticsSingle(int columnIndexID, List <int> columnIndexList, IDataRecord record, RegExpProcessingResultsCollection <RegExpStatisticsSingleProcessingResult> results)
        {
            if (record.IsDBNull(columnIndexID))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            for (var i = 0; i < columnIndexList.Count; i++)
            {
                if (record.IsDBNull(columnIndexList[i]))
                {
                    return;
                }

                ///////////////////////////////////////////////////////////////////////////////

                var documentID = record.GetDouble(columnIndexID);
                var docText    = record.GetString(columnIndexList[i]);

                ///////////////////////////////////////////////////////////////////////////////

                var matches = _regExp.GetFilteredMatches(docText);
                if (matches.Any())
                {
                    matches.ForEach(x => results.Add(new RegExpStatisticsSingleProcessingResult
                    {
                        Word       = x.Value,
                        DocumentID = documentID,
                        ColumnID   = columnIndexList[i] - 1,
                        Start      = x.Index,
                        Length     = x.Length
                    }));
                }
            }
        }