protected void ExtractDocumentValues(double documentID, int columnIndexText, IDataRecord record, Dictionary <int, CSScriptManager> scripts, int columnID, RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> results, ColRegExpExtractProcessingParams param) { if (record.IsDBNull(columnIndexText)) { return; } /////////////////////////////////////////////////////////////////////////////// var docText = record.GetString(columnIndexText); if (scripts == null) { using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password)) { docsConnection.Open(); foreach (var regExp in _listRegExps.Where(x => x.ExtractOptions != null && x.ExtractOptions.Extract)) { var noteText = DatabaseHelper.GetNoteText(docsConnection, documentID, regExp.ExtractOptions.NoteTextColumn); var extractResult = ExtractRegExpValues(regExp, documentID, noteText); if (extractResult != null) { results.Add(extractResult); } } /////////////////////////////////////////////////////////////////////////////// } } else { foreach (var pair in scripts) { var extractResult = ScriptExtractRegExpValues(documentID, docText, pair.Value, pair.Key); if (extractResult != null) { results.Add(extractResult); } } } }
protected RegExpProcessingResultsCollection <RegExpScoreProcessingResult> Parallel_CalcScores(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, List <int> columnIndexTextList, List <RegExpSynergy> synergies) { var results = new RegExpProcessingResultsCollection <RegExpScoreProcessingResult>(); /////////////////////////////////////////////////////////////////////////////// long progressStep = _listRegExps.Count; long progressMax = docsCount * progressStep; long progressValue = 0; ////////////////////////////////////////////////////////////////////////// Parallel.ForEach(enumerableDocs, (record, state) => { try { //List<int> tmp = new List<int>(); //tmp.Add(1); var scoreResult = CalcDocumentScore(columnIndexID, columnIndexTextList, record, synergies); if (scoreResult != null) { results.Add(scoreResult); } /////////////////////////////////////////////////////////////////////////////// var threadProgressValue = Interlocked.Add(ref progressValue, progressStep); var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D); if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue)) { state.Stop(); } } catch (Exception ex) { //Logger.AppendToLog("Parallel_CalcScores"); Logger.HandleException(ex); } }); /////////////////////////////////////////////////////////////////////////////// return(results); }
private void CalcDocumentStatisticsSingle(int columnIndexID, List <int> columnIndexList, IDataRecord record, RegExpProcessingResultsCollection <RegExpStatisticsSingleProcessingResult> results) { if (record.IsDBNull(columnIndexID)) { return; } /////////////////////////////////////////////////////////////////////////////// for (var i = 0; i < columnIndexList.Count; i++) { if (record.IsDBNull(columnIndexList[i])) { return; } /////////////////////////////////////////////////////////////////////////////// var documentID = record.GetDouble(columnIndexID); var docText = record.GetString(columnIndexList[i]); /////////////////////////////////////////////////////////////////////////////// var matches = _regExp.GetFilteredMatches(docText); if (matches.Any()) { matches.ForEach(x => results.Add(new RegExpStatisticsSingleProcessingResult { Word = x.Value, DocumentID = documentID, ColumnID = columnIndexList[i] - 1, Start = x.Index, Length = x.Length })); } } }