Exemplo n.º 1
0
        protected RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> Parallel_ExtractValues(IEnumerable <IDataRecord> enumerableDocs, long docsCount, int columnIndexID, int columnIndexText, ColRegExpExtractProcessingParams param, Dictionary <int, CSScriptManager> scripts)
        {
            var results = new RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult>();

            ///////////////////////////////////////////////////////////////////////////////

            long progressStep  = _listRegExps.Count;
            long progressMax   = docsCount * progressStep;
            long progressValue = 0;

            ///////////////////////////////////////////////////////////////////////////////

            Parallel.ForEach(enumerableDocs, (record, state) =>
            {
                try
                {
                    if (record.IsDBNull(columnIndexID))
                    {
                        return;
                    }

                    ///////////////////////////////////////////////////////////////////////////////

                    var documentID = record.GetDouble(columnIndexID);
                    if (param.DocumentsList.Any() && !param.DocumentsList.Contains(documentID))
                    {
                        return;
                    }

                    ///////////////////////////////////////////////////////////////////////////////

                    ExtractDocumentValues(documentID, columnIndexText, record, scripts, param.ColumnID, results, param);

                    ///////////////////////////////////////////////////////////////////////////////

                    var threadProgressValue = Interlocked.Add(ref progressValue, progressStep);

                    var progressPercentage = (int)(threadProgressValue / (double)progressMax * 100D);

                    if (!this.Logger.ReportProgress(progressPercentage, threadProgressValue))
                    {
                        state.Stop();
                    }
                }
                catch (Exception ex)
                {
                    Logger.HandleException(ex);
                }
            });

            ///////////////////////////////////////////////////////////////////////////////

            return(results);
        }
Exemplo n.º 2
0
        protected void ExtractDocumentValues(double documentID, int columnIndexText, IDataRecord record, Dictionary <int, CSScriptManager> scripts, int columnID, RegExpProcessingResultsCollection <ColRegExpExtractProcessingResult> results, ColRegExpExtractProcessingParams param)
        {
            if (record.IsDBNull(columnIndexText))
            {
                return;
            }

            ///////////////////////////////////////////////////////////////////////////////

            var docText = record.GetString(columnIndexText);

            if (scripts == null)
            {
                using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
                {
                    docsConnection.Open();

                    foreach (var regExp in _listRegExps.Where(x => x.ExtractOptions != null && x.ExtractOptions.Extract))
                    {
                        var noteText = DatabaseHelper.GetNoteText(docsConnection, documentID, regExp.ExtractOptions.NoteTextColumn);

                        var extractResult = ExtractRegExpValues(regExp, documentID, noteText);
                        if (extractResult != null)
                        {
                            results.Add(extractResult);
                        }
                    }

                    ///////////////////////////////////////////////////////////////////////////////
                }
            }
            else
            {
                foreach (var pair in scripts)
                {
                    var extractResult = ScriptExtractRegExpValues(documentID, docText, pair.Value, pair.Key);
                    if (extractResult != null)
                    {
                        results.Add(extractResult);
                    }
                }
            }
        }
Exemplo n.º 3
0
        public void ExtractValues(ColRegExpExtractProcessingParams param)
        {
            ///////////////////////////////////////////////////////////////////////////////
            //MessageBox.Show("ColRegExpProcessor: ExtractValues");

            using (var docsConnection = DatabaseHelper.CreateConnection(param.DocumentsDatabaseFilePath, param.Password))
            {
                docsConnection.Open();

                ///////////////////////////////////////////////////////////////////////////////

                var docsCount = DatabaseHelper.GetRowsCount(docsConnection, "Documents", param.OnlyPositiveScore ? "Score > 0" : null);

                string query = "SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents";;

                if (param.ScriptExtract && param.ScriptCode != null)
                {
                    int index = GetNoteColumnIndexFromCode(param.ScriptCode);

                    if (index != 0)
                    {
                        query = "SELECT ED_ENC_NUM, NOTE_TEXT" + index.ToString() + " FROM Documents";
                    }
                }

                if (param.OnlyPositiveScore)
                {
                    query += " WHERE Score > 0";
                }

                var documentRecords = DatabaseHelper.GetDataRecords(docsConnection, query);

                ///////////////////////////////////////////////////////////////////////////////

                Dictionary <int, CSScriptManager> scripts = null;

                if (param.ScriptExtract)
                {
                    if (param.ColumnID != -1)
                    {
                        scripts = new Dictionary <int, CSScriptManager>
                        {
                            {
                                param.ColumnID,
                                CreateScriptManager(param.ScriptCode)
                            }
                        };
                    }
                    else
                    {
                        scripts = GetScripts(param.RegExpDatabaseFilePath, param.Password);
                    }
                }

                ///////////////////////////////////////////////////////////////////////////////

                var results = Parallel_ExtractValues(documentRecords, docsCount, 0, 1, param, scripts);

                results.Serialize(param.GetFullPath(param.ExtractOutputFileName));
            }
        }