예제 #1
0
        public IActionResult GetByRecordFilter([FromQuery] string filter)
        {
            string entityAsJson = "";
            List <KeyPhraseCount> keyPhrases = new List <KeyPhraseCount>();

            try
            {
                _logger.LogInformation("CPAPI: Get By Record Filter");

                // Deserialize the filter
                KeyPhraseFilter oFilter = new KeyPhraseFilter();
                if (filter != null && filter.Length > 0)
                {
                    _logger.LogDebug("Deserializing filter of length: " + filter.Length);
                    oFilter = JsonConvert.DeserializeObject <KeyPhraseFilter>(filter);
                }

                string storageAccountConnectionString = Utils.GetSecretOrEnvVar(ConfigurationProperties.AzureStorageAccountConnectionString, Configuration, _logger).Trim();
                // validate tika base address
                if (storageAccountConnectionString == "")
                {
                    _logger.LogWarning("Azure storage account connection string not set");
                    return(StatusCode((int)System.Net.HttpStatusCode.InternalServerError));
                }
                else
                {
                    _logger.LogDebug("Azure storage account connection string loaded");
                }

                CloudTable tRecordAssociationKeyphrases = Utils.GetCloudTable("stlprecordassociationkeyphrases", _logger);

                // Create a default query
                TableQuery <KeyPhraseEntity> query = new TableQuery <KeyPhraseEntity>();

                //string finalFilter = "";
                string recordFilter = "";
                //string recordAssociationFilter = "";
                string keyphraseFilter = "";
                string combinedFilter  = "";

                // Add any record association filters
                if (oFilter.records.Count > 0)
                {
                    foreach (string rif in oFilter.records)
                    {
                        // Validate the record filter value
                        if (rif != null && rif != "")
                        {
                            // Validate the record filter
                            string cleanFilterPKey = Utils.CleanTableKey(rif);
                            if (!cleanFilterPKey.EndsWith("|"))
                            {
                                cleanFilterPKey = cleanFilterPKey + "|";
                            }

                            string pkqueryStart         = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.GreaterThanOrEqual, cleanFilterPKey);
                            string pkqueryEnd           = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.LessThan, Utils.GetLessThanFilter(cleanFilterPKey));
                            string combinedRecordFilter = TableQuery.CombineFilters(pkqueryStart, TableOperators.And, pkqueryEnd);
                            if (recordFilter != "")
                            {
                                recordFilter = TableQuery.CombineFilters(recordFilter, TableOperators.Or, combinedRecordFilter);
                            }
                            else
                            {
                                recordFilter = combinedRecordFilter;
                            }
                        }
                    }
                }

                // Add any keyphrase filters
                if (oFilter.keyphrases.Count > 0)
                {
                    foreach (string rif in oFilter.keyphrases)
                    {
                        if (rif != null && rif != "")
                        {
                            string cleanFilterRKey = Utils.CleanTableKey(rif);

                            string rkqueryStart = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.GreaterThanOrEqual, cleanFilterRKey);
                            string rkqueryEnd   = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.LessThan, Utils.GetLessThanFilter(cleanFilterRKey));

                            string rkqueryCombined = TableQuery.CombineFilters(rkqueryStart, TableOperators.And, rkqueryEnd);

                            if (keyphraseFilter != "")
                            {
                                keyphraseFilter = TableQuery.CombineFilters(keyphraseFilter, TableOperators.Or, rkqueryCombined);
                            }
                            else
                            {
                                keyphraseFilter = rkqueryCombined;
                            }
                        }
                    }
                }

                // Combine querys if needed
                if (recordFilter.Length > 0)
                {
                    if (keyphraseFilter.Length > 0)
                    {
                        // Combine queries when both filters are set
                        combinedFilter = TableQuery.CombineFilters(recordFilter, TableOperators.And, keyphraseFilter);
                    }
                    else
                    {
                        combinedFilter = recordFilter;
                    }
                }
                else
                {
                    if (keyphraseFilter.Length > 0)
                    {
                        combinedFilter = keyphraseFilter;
                    }
                }

                // Create final combined query
                query = new TableQuery <KeyPhraseEntity>().Where(combinedFilter);


                List <KeyPhraseEntity> keyphraseEntities = new List <KeyPhraseEntity>();
                TableContinuationToken token             = null;

                var runningQuery = new TableQuery <KeyPhraseEntity>()
                {
                    FilterString  = query.FilterString,
                    SelectColumns = query.SelectColumns
                };

                do
                {
                    runningQuery.TakeCount = query.TakeCount - keyphraseEntities.Count;

                    Task <TableQuerySegment <KeyPhraseEntity> > tSeg = tRecordAssociationKeyphrases.ExecuteQuerySegmentedAsync <KeyPhraseEntity>(runningQuery, token);
                    tSeg.Wait();
                    token = tSeg.Result.ContinuationToken;
                    keyphraseEntities.AddRange(tSeg.Result);
                } while (token != null && (query.TakeCount == null || keyphraseEntities.Count < query.TakeCount.Value) && keyphraseEntities.Count < 20000);    //!ct.IsCancellationRequested &&


                //keyphraseEntities.Sort((x, y) => String.Compare(x.RowKey, y.RowKey));

                // Return only distinct keyphrases
                foreach (KeyPhraseEntity kp in keyphraseEntities)
                {
                    KeyPhraseCount foundKPCount = keyPhrases.Find(x => (x.KeyPhrase == kp.RowKey));
                    if (foundKPCount == null)
                    {
                        KeyPhraseCount newKPCount = new KeyPhraseCount(kp.RowKey);
                        keyPhrases.Add(newKPCount);
                    }
                    else
                    {
                        // Increment the number of keyphrases found
                        foundKPCount.Count++;
                    }
                }
                keyPhrases.Sort((x, y) => String.Compare(x.KeyPhrase, y.KeyPhrase));

                // Serialize
                entityAsJson = JsonConvert.SerializeObject(keyPhrases, Formatting.Indented);
            }
            catch (Exception ex)
            {
                string exceptionMsg = "KeyPhrase GET exception: " + ex.Message;
                //log.Info("Exception occurred extracting text from uploaded file \r\nError: " + ex.Message);
                if (ex.InnerException != null)
                {
                    exceptionMsg = exceptionMsg + "[" + ex.InnerException.Message + "]";
                }

                _logger.LogError(exceptionMsg);
                return(StatusCode((int)System.Net.HttpStatusCode.InternalServerError));
            }

            ObjectResult result = new ObjectResult(entityAsJson);

            return(result);
        }
        private List <KeyPhraseCount> GetRecordKeyPhrasesForRecordAssociations(KeyPhraseFilter oFilter)
        {
            List <KeyPhraseCount> keyPhrases = new List <KeyPhraseCount>();

            CloudTable tRecordAssociationKeyphrases = Utils.GetCloudTable("stlprecordassociationkeyphrases", _logger);

            // Create a default query
            TableQuery <RecordKeyPhraseEntity> query = new TableQuery <RecordKeyPhraseEntity>();

            //string finalFilter = "";
            string recordFilter = "";
            //string recordAssociationFilter = "";
            string keyphraseFilter = "";
            string combinedFilter  = "";

            // Add any record association filters
            if (oFilter.records.Count > 0)
            {
                foreach (string rif in oFilter.records)
                {
                    if (rif != null && rif != "")
                    {
                        // Validate the record filter
                        string cleanFilterPKey = Utils.CleanTableKey(rif);
                        if (!cleanFilterPKey.EndsWith("|"))
                        {
                            cleanFilterPKey = cleanFilterPKey + "|";
                        }

                        string pkqueryStart         = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.GreaterThanOrEqual, cleanFilterPKey);
                        string pkqueryEnd           = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.LessThan, Utils.GetLessThanFilter(cleanFilterPKey));
                        string combinedRecordFilter = TableQuery.CombineFilters(pkqueryStart, TableOperators.And, pkqueryEnd);
                        if (recordFilter != "")
                        {
                            recordFilter = TableQuery.CombineFilters(recordFilter, TableOperators.Or, combinedRecordFilter);
                        }
                        else
                        {
                            recordFilter = combinedRecordFilter;
                        }
                    }
                }
            }

            // Add any keyphrase filters
            if (oFilter.keyphrases.Count > 0)
            {
                foreach (string rif in oFilter.keyphrases)
                {
                    string cleanFilterRKey = Utils.CleanTableKey(rif);

                    string rkqueryStart = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.GreaterThanOrEqual, cleanFilterRKey);
                    string rkqueryEnd   = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.LessThan, Utils.GetLessThanFilter(cleanFilterRKey));

                    string rkqueryCombined = TableQuery.CombineFilters(rkqueryStart, TableOperators.And, rkqueryEnd);

                    if (keyphraseFilter != "")
                    {
                        keyphraseFilter = TableQuery.CombineFilters(keyphraseFilter, TableOperators.Or, rkqueryCombined);
                    }
                    else
                    {
                        keyphraseFilter = rkqueryCombined;
                    }
                }
            }

            // Combine querys if needed
            if (recordFilter.Length > 0)
            {
                if (keyphraseFilter.Length > 0)
                {
                    // Combine queries when both filters are set
                    combinedFilter = TableQuery.CombineFilters(recordFilter, TableOperators.And, keyphraseFilter);
                }
                else
                {
                    combinedFilter = recordFilter;
                }
            }
            else
            {
                if (keyphraseFilter.Length > 0)
                {
                    combinedFilter = keyphraseFilter;
                }
            }

            // Create final combined query
            query = new TableQuery <RecordKeyPhraseEntity>().Where(combinedFilter);


            List <RecordKeyPhraseEntity> keyphraseEntities = new List <RecordKeyPhraseEntity>();
            TableContinuationToken       token             = null;

            var runningQuery = new TableQuery <RecordKeyPhraseEntity>()
            {
                FilterString  = query.FilterString,
                SelectColumns = query.SelectColumns
            };

            do
            {
                runningQuery.TakeCount = query.TakeCount - keyphraseEntities.Count;

                Task <TableQuerySegment <RecordKeyPhraseEntity> > tSeg = tRecordAssociationKeyphrases.ExecuteQuerySegmentedAsync <RecordKeyPhraseEntity>(runningQuery, token);
                tSeg.Wait();
                token = tSeg.Result.ContinuationToken;
                keyphraseEntities.AddRange(tSeg.Result);
            } while (token != null && (query.TakeCount == null || keyphraseEntities.Count < query.TakeCount.Value) && keyphraseEntities.Count < 20000);    //!ct.IsCancellationRequested &&


            //keyphraseEntities.Sort((x, y) => String.Compare(x.RowKey, y.RowKey));

            // Return only distinct keyphrases
            foreach (RecordKeyPhraseEntity kp in keyphraseEntities)
            {
                KeyPhraseCount foundKPCount = keyPhrases.Find(x => (x.KeyPhrase == kp.RowKey));
                if (foundKPCount == null)
                {
                    KeyPhraseCount newKPCount = new KeyPhraseCount(kp.RowKey, 1);
                    keyPhrases.Add(newKPCount);
                }
                else
                {
                    // Increment the number of keyphrases found
                    foundKPCount.Count++;
                }
            }

            // Sort by most common keyphrase in descending order
            keyPhrases.Sort((x, y) => y.Count.CompareTo(x.Count));

            return(keyPhrases);
        }