public static DataSearchResult SearchData(DataSet ds, string queryString, int page, int pageSize, string sort = null, bool excludeBigProperties = true, bool withHighlighting = false, bool exactNumOfResults = false) { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); var query = Lib.Searching.Tools.FixInvalidQuery(queryString, queryShorcuts, queryOperators); var res = _searchData(ds, query, page, pageSize, sort, excludeBigProperties, withHighlighting, exactNumOfResults); sw.Stop(); if (!res.IsValid) { throw DataSetException.GetExc( ds.DatasetId, ApiResponseStatus.InvalidSearchQuery.error.number, ApiResponseStatus.InvalidSearchQuery.error.description, queryString ); } if (res.Total > 0) { return new DataSearchResult() { ElapsedTime = sw.Elapsed, Q = queryString, IsValid = true, Total = res.Total, Result = res.Hits .Select(m => Newtonsoft.Json.JsonConvert.SerializeObject(m.Source)) .Select(s => (dynamic)Newtonsoft.Json.Linq.JObject.Parse(s)), Page = page, PageSize = pageSize, DataSet = ds, ElasticResultsRaw = res, } } ; else { return new DataSearchResult() { ElapsedTime = sw.Elapsed, Q = queryString, IsValid = true, Total = 0, Result = new dynamic[] { }, Page = page, PageSize = pageSize, DataSet = ds, ElasticResultsRaw = res, } }; }
public static DatasetMultiResult GeneralSearch(string query, IEnumerable <External.DataSets.DataSet> datasets = null, int page = 1, int pageSize = 20, string sort = null) { DatasetMultiResult res = new DatasetMultiResult() { Query = query, DataSource = "DatasetMultiResult.GeneralSearch" }; if (string.IsNullOrEmpty(query)) { return(res); } if (!Lib.Searching.Tools.ValidateQuery(query)) { res.Exceptions.Add(new System.Exception($"Invalid Query: {query}")); return(res); } if (datasets == null) { datasets = Lib.Data.External.DataSets.DataSetDB.ProductionDataSets.Get(); } ParallelOptions po = new ParallelOptions(); po.MaxDegreeOfParallelism = System.Diagnostics.Debugger.IsAttached ? 1 : po.MaxDegreeOfParallelism; Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); Parallel.ForEach(datasets, po, ds => { try { var rds = ds.SearchData(query, page, pageSize, sort); if (rds.IsValid) { //var dssr = new DatasetSumGeneralResult(rds.Total, rds.Result.Select(s => s.Item2), ds, rds.ElapsedTime); res.Results.Add(rds); } } catch (External.DataSets.DataSetException e) { res.Exceptions.Add(e); } catch (System.Exception e) { res.Exceptions.Add(e); //HlidacStatu.Util.Consts.Logger.Warning("DatasetMultiResult GeneralSearch for query" + query, e); } }); sw.Stop(); res.ElapsedTime = sw.Elapsed; return(res); }
public static string[] FindCitations(string text) { var stopw = new Devmasters.Core.StopWatchEx(); stopw.Start(); string[] sText = HlidacStatu.Lang.CS.Politici.Stems(text); stopw.Stop(); //Console.WriteLine($"stemmer {stopw.ExactElapsedMiliseconds} "); stopw.Restart(); List <string> found = new List <string>(); foreach (var kv in PoliticiStems) { string zkratka = kv.Item1; string[] politik = kv.Item2; for (int i = 0; i < sText.Length - (politik.Length - 1); i++) { bool same = true; for (int j = 0; j < politik.Length; j++) { if (sText[i + j] == politik[j]) { same = same & true; } else { same = false; break; } } if (same) { if (!found.Contains(zkratka)) { found.Add(zkratka); } break; } } } stopw.Stop(); //Console.WriteLine($"location {stopw.ExactElapsedMiliseconds} "); return(found.ToArray()); }
public static SmlouvaSearchResult SimpleSearch(string query, int page, int pageSize, OrderResult order, AggregationContainerDescriptor <Lib.Data.Smlouva> anyAggregation = null, int?platnyZaznam = null, bool includeNeplatne = false, bool logError = true, bool fixQuery = true) { var result = new SmlouvaSearchResult() { Page = page, PageSize = pageSize, OrigQuery = query, Q = query, Order = ((int)order).ToString() }; if (string.IsNullOrEmpty(query)) { result.Result = null; result.IsValid = false; result.Total = 0; return(result); } Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (fixQuery) { query = FixInvalidQuery(query); result.Q = query; } if (logError && result.Q != result.OrigQuery) { HlidacStatu.Util.Consts.Logger.Debug(new Devmasters.Core.Logging.LogMessage() .SetMessage("Fixed query") .SetCustomKeyValue("runningQuery", result.Q) .SetCustomKeyValue("origQuery", result.OrigQuery) ); } if (platnyZaznam.HasValue) { query = ModifyQuery(query, "platnyZaznam:" + platnyZaznam.Value); } ISearchResponse <Lib.Data.Smlouva> res = _coreSearch(GetSimpleQuery(query), page, pageSize, order, anyAggregation, platnyZaznam, includeNeplatne, logError); if (res.IsValid == false && logError) { Lib.ES.Manager.LogQueryError <Lib.Data.Smlouva>(res, query); } sw.Stop(); result.ElapsedTime = sw.Elapsed; result.Total = res?.Total ?? 0; result.IsValid = res?.IsValid ?? false; result.ElasticResults = res; return(result); }
public static VerejnaZakazkaSearchData SimpleSearch( VerejnaZakazkaSearchData search, AggregationContainerDescriptor <VerejnaZakazka> anyAggregation = null, bool logError = true, bool fixQuery = true, ElasticClient client = null, bool withHighlighting = false) { if (client == null) { client = HlidacStatu.Lib.ES.Manager.GetESClient_VZ(); } string query = search.Q ?? ""; int page = search.Page - 1; if (page < 0) { page = 0; } AggregationContainerDescriptor <VerejnaZakazka> baseAggrDesc = null; baseAggrDesc = anyAggregation == null ? null //new AggregationContainerDescriptor<VerejnaZakazka>().Sum("sumKc", m => m.Field(f => f.Castka)) : anyAggregation; Func <AggregationContainerDescriptor <VerejnaZakazka>, AggregationContainerDescriptor <VerejnaZakazka> > aggrFunc = (aggr) => { return(baseAggrDesc); }; Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (fixQuery) { search.OrigQuery = query; query = Lib.Searching.Tools.FixInvalidQuery(query, queryShorcuts, queryOperators); } if (logError && search.Q != search.OrigQuery) { HlidacStatu.Util.Consts.Logger.Debug(new Devmasters.Core.Logging.LogMessage() .SetMessage("Fixed query") .SetCustomKeyValue("runningQuery", search.Q) .SetCustomKeyValue("origQuery", search.OrigQuery) ); } search.Q = query; ISearchResponse <VerejnaZakazka> res = null; try { res = client .Search <VerejnaZakazka>(s => s .Size(search.PageSize) .Source(so => so.Excludes(ex => ex.Field("dokumenty.plainText"))) .From(page * search.PageSize) .Query(q => GetSimpleQuery(search)) .Sort(ss => GetSort(Convert.ToInt32(search.Order))) .Aggregations(aggrFunc) .Highlight(h => Lib.Searching.Tools.GetHighlight <VerejnaZakazka>(withHighlighting)) .TrackTotalHits(search.ExactNumOfResults ? true : (bool?)null) ); if (withHighlighting && res.Shards != null && res.Shards.Failed > 0) //if some error, do it again without highlighting { res = client .Search <VerejnaZakazka>(s => s .Size(search.PageSize) .Source(so => so.Excludes(ex => ex.Field("dokumenty.plainText"))) .From(page * search.PageSize) .Query(q => GetSimpleQuery(search)) .Sort(ss => GetSort(Convert.ToInt32(search.Order))) .Aggregations(aggrFunc) .Highlight(h => Lib.Searching.Tools.GetHighlight <VerejnaZakazka>(false)) .TrackTotalHits(search.ExactNumOfResults ? true : (bool?)null) ); } } catch (Exception e) { Audit.Add(Audit.Operations.Search, "", "", "VerejnaZakazka", "error", search.Q, null); if (res != null && res.ServerError != null) { Lib.ES.Manager.LogQueryError <VerejnaZakazka>(res, "Exception, Orig query:" + search.OrigQuery + " query:" + search.Q + "\n\n res:" + search.Result.ToString() , ex: e); } else { HlidacStatu.Util.Consts.Logger.Error("", e); } throw; } sw.Stop(); Audit.Add(Audit.Operations.Search, "", "", "VerejnaZakazka", res.IsValid ? "valid" : "invalid", search.Q, null); if (res.IsValid == false && logError) { Lib.ES.Manager.LogQueryError <VerejnaZakazka>(res, "Exception, Orig query:" + search.OrigQuery + " query:" + search.Q + "\n\n res:" + search.Result?.ToString() ); } search.Total = res?.Total ?? 0; search.IsValid = res?.IsValid ?? false; search.ElasticResults = res; search.ElapsedTime = sw.Elapsed; return(search); }
public static IEnumerable <string> FindAllIcoInMemory(string query, int limit) { string findAllIcoTimes = $"FindAllIco {query}\n"; if (string.IsNullOrEmpty(query)) { return new string[] { } } ; var items = new List <Tuple <string, decimal> >(); Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); var resExact = StaticData.FirmyNazvy.Get() .Where(m => m.Value.Jmeno == query) // Data.External.FirmyDB.AllFromName(query) .Select(m => new Tuple <string, decimal>(m.Key, 1m)); if (resExact.Count() > 0) { items.AddRange(resExact); } sw.Stop(); findAllIcoTimes += $"step1:{sw.ElapsedMilliseconds}\n"; string aQuery = Devmasters.Core.TextUtil.RemoveDiacritics(query).ToLower(); if (items.Count < limit) { sw.Restart(); //add more if (StaticData.FirmyNazvyOnlyAscii.ContainsKey(aQuery)) { var res = StaticData.FirmyNazvyOnlyAscii[aQuery] .Where(ico => !string.IsNullOrEmpty(ico)).Select(ico => new Tuple <string, decimal>(ico, 0.9m)) .GroupBy(g => g.Item1, v => v.Item2, (g, v) => new Tuple <string, decimal>(g, v.Max())); items.AddRange(res); } sw.Stop(); findAllIcoTimes += $"step2:{sw.ElapsedMilliseconds}\n"; } if (items.Count < limit) { sw.Restart(); //add more var res = StaticData.FirmyNazvyOnlyAscii .Where(m => m.Key.StartsWith(aQuery, StringComparison.Ordinal)) .Take(limit - items.Count) .SelectMany(m => m.Value.Where(ico => !string.IsNullOrEmpty(ico)).Select(ico => new Tuple <string, decimal>(ico, 0.5m))) .GroupBy(g => g.Item1, v => v.Item2, (g, v) => new Tuple <string, decimal>(g, v.Max())); items.AddRange(res); sw.Stop(); findAllIcoTimes += $"step3:{sw.ElapsedMilliseconds}\n"; } if (items.Count < limit && aQuery.Length >= 5) { sw.Restart(); //add more var res = StaticData.FirmyNazvyOnlyAscii .Where(m => m.Key.Contains(aQuery)) .OrderBy(m => Validators.LevenshteinDistanceCompute(m.Key, aQuery)) .Take(limit - items.Count) .Where(m => Validators.LevenshteinDistanceCompute(m.Key, aQuery) < 10) .SelectMany(m => m.Value.Where(ico => !string.IsNullOrEmpty(ico)).Select(ico => new Tuple <string, decimal>(ico, 0.5m))) .GroupBy(g => g.Item1, v => v.Item2, (g, v) => new Tuple <string, decimal>(g, v.Max())); items.AddRange(res); sw.Stop(); findAllIcoTimes += $"step4:{sw.ElapsedMilliseconds}\n"; } if (Devmasters.Core.Util.Config.GetConfigValue("LogSearchTimes") == "true") { HlidacStatu.Util.Consts.Logger.Info(findAllIcoTimes); } return(items .Take(limit) .Select(m => m.Item1)); }
public static MultiResult GeneralSearch(string query, int page = 1, int pageSize = 10) { MultiResult res = new MultiResult() { Query = query }; if (string.IsNullOrEmpty(query)) { return(res); } ParallelOptions po = new ParallelOptions(); po.MaxDegreeOfParallelism = System.Diagnostics.Debugger.IsAttached ? 1 : po.MaxDegreeOfParallelism; Parallel.Invoke(po, () => { try { res.Datasets = Lib.Data.Search.DatasetMultiResult.GeneralSearch(query, null, 1, 5); if (res.Datasets.Exceptions.Count > 0) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for DatasetMulti query " + query, res.Datasets.GetExceptions()); } } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for DatasetMulti query " + query, e); } }, () => { try { res.Smlouvy = HlidacStatu.Lib.ES.SearchTools.SimpleSearch(query, 1, 20, ES.SearchTools.OrderResult.Relevance, anyAggregation: new Nest.AggregationContainerDescriptor <HlidacStatu.Lib.Data.Smlouva>().Sum("sumKc", m => m.Field(f => f.CalculatedPriceWithVATinCZK)) ); } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Smlouvy query" + query, e); } }, () => { try { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); res.Firmy = new GeneralResult <string>(Firma.Search.FindAllIco(query, 1000)); sw.Stop(); res.Firmy.ElapsedTime = sw.Elapsed; } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Firmy query" + query, e); } }, () => { try { res.VZ = VZ.VerejnaZakazka.Searching.SimpleSearch(query, null, 1, 20, (int)ES.VerejnaZakazkaSearchData.VZOrderResult.Relevance); } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Verejne zakazky query" + query, e); } }, () => { try { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (!string.IsNullOrEmpty(query) && query.Length > 2) { res.Osoby = new GeneralResult <Osoba>( HlidacStatu.Lib.Data.Osoba.GetPolitikByNameFtx(query, 100) .OrderBy(m => m.Prijmeni) .ThenBy(m => m.Jmeno) ); } else { res.Osoby = new GeneralResult <Osoba>(new Osoba[] { }); } sw.Stop(); res.Osoby.ElapsedTime = sw.Elapsed; } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Osoba query" + query, e); } }, () => { try { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); var buRes = HlidacStatu.Lib.Data.TransparentniUcty.BankovniUcty.SearchPolozkyRaw(query, null, 20); if (buRes != null && buRes.IsValid) { res.Transakce = new GeneralResult <TransparentniUcty.BankovniPolozka>( buRes.Total, buRes.Hits.Select(m => m.Source), buRes.IsValid ); } else { res.Transakce = new GeneralResult <TransparentniUcty.BankovniPolozka>(new TransparentniUcty.BankovniPolozka[] { }); } sw.Stop(); res.Transakce.ElapsedTime = sw.Elapsed; } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for bankovni ucty query" + query, e); } } ); if (res.HasFirmy && (res.Osoby == null || res.Osoby.Total < 5)) { if (res.Osoby == null) { res.Osoby = new GeneralResult <Osoba>(new Osoba[] { }); } res.Osoby = new GeneralResult <Osoba>(res.Osoby.Result .Concat(Osoba.GetPolitikByQueryFromFirmy(query, (int)(10 - (res.Osoby?.Total ?? 0)), res.Firmy.Result) ) ); res.OsobaFtx = true; } return(res); }
public static SmlouvaSearchResult SimpleSearch(string query, int page, int pageSize, OrderResult order, AggregationContainerDescriptor <Lib.Data.Smlouva> anyAggregation = null, bool?platnyZaznam = null, bool includeNeplatne = false, bool logError = true, bool fixQuery = true, bool withHighlighting = false, bool exactNumOfResults = false) { var result = new SmlouvaSearchResult() { Page = page, PageSize = pageSize, OrigQuery = query, Q = query, Order = ((int)order).ToString() }; if (string.IsNullOrEmpty(query)) { result.Result = null; result.IsValid = false; result.Total = 0; return(result); } Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (fixQuery) { query = Searching.Tools.FixInvalidQuery(query, irules, Searching.Tools.DefaultQueryOperators); result.Q = query; } if (logError && result.Q != result.OrigQuery) { HlidacStatu.Util.Consts.Logger.Debug(new Devmasters.Core.Logging.LogMessage() .SetMessage("Fixed query") .SetCustomKeyValue("runningQuery", result.Q) .SetCustomKeyValue("origQuery", result.OrigQuery) ); } if (platnyZaznam.HasValue) { query = Lib.Searching.Tools.ModifyQueryAND(query, "platnyZaznam:" + platnyZaznam.Value.ToString().ToLower()); } ISearchResponse <Lib.Data.Smlouva> res = _coreSearch(GetSimpleQuery(query), page, pageSize, order, anyAggregation, platnyZaznam, includeNeplatne, logError, withHighlighting, exactNumOfResults); Data.Audit.Add(Data.Audit.Operations.Search, "", "", "Smlouva", res.IsValid ? "valid" : "invalid", query, null); if (res.IsValid == false && logError) { Lib.ES.Manager.LogQueryError <Lib.Data.Smlouva>(res, query); } sw.Stop(); result.ElapsedTime = sw.Elapsed; try { result.Total = res?.Total ?? 0; } catch (Exception) { result.Total = 0; } result.IsValid = res?.IsValid ?? false; result.ElasticResults = res; return(result); }
private static MultiResult GeneralSearch(Elastic.Apm.Api.ITransaction apmtran, string query, int page = 1, int pageSize = 10, bool showBeta = false) { MultiResult res = new MultiResult() { Query = query }; if (string.IsNullOrEmpty(query)) { return(res); } if (!Lib.Searching.Tools.ValidateQuery(query)) { res.Smlouvy = new Searching.SmlouvaSearchResult(); res.Smlouvy.Q = query; res.Smlouvy.IsValid = false; return(res); } var totalsw = new Devmasters.Core.StopWatchEx(); totalsw.Start(); ParallelOptions po = new ParallelOptions(); //po.MaxDegreeOfParallelism = 20; po.MaxDegreeOfParallelism = System.Diagnostics.Debugger.IsAttached ? 1 : po.MaxDegreeOfParallelism; Parallel.Invoke(po, () => { Elastic.Apm.Api.ISpan sp = null; try { apmtran.CaptureSpan("Smlouvy", "search", () => { res.Smlouvy = HlidacStatu.Lib.Data.Smlouva.Search.SimpleSearch(query, 1, 20, Smlouva.Search.OrderResult.Relevance, anyAggregation: new Nest.AggregationContainerDescriptor <HlidacStatu.Lib.Data.Smlouva>().Sum("sumKc", m => m.Field(f => f.CalculatedPriceWithVATinCZK)) ); }); } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Smlouvy query" + query, e); } finally { sp?.End(); } }, () => { try { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); res.Firmy = new GeneralResult <string>(Firma.Search.FindAllIco(query, 50)); sw.Stop(); res.Firmy.ElapsedTime = sw.Elapsed; } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Firmy query" + query, e); } }, () => { try { res.VZ = VZ.VerejnaZakazka.Searching.SimpleSearch(query, null, 1, 5, (int)Lib.Searching.VerejnaZakazkaSearchData.VZOrderResult.Relevance); } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Verejne zakazky query" + query, e); } }, () => { try { Devmasters.Core.StopWatchEx sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (!string.IsNullOrEmpty(query) && query.Length > 2) { res.Osoby = new GeneralResult <Osoba>( HlidacStatu.Lib.Data.Osoba.GetPolitikByNameFtx(query, 100) .OrderBy(m => m.Prijmeni) .ThenBy(m => m.Jmeno) ); } else { res.Osoby = new GeneralResult <Osoba>(new Osoba[] { }); } sw.Stop(); res.Osoby.ElapsedTime = sw.Elapsed; } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for Osoba query" + query, e); } }, () => { try { var iqu = new Searching.InsolvenceSearchResult { Q = query, PageSize = 5 }; res.Insolvence = iqu; //if (showBeta) res.Insolvence = Insolvence.Insolvence.SimpleSearch(new Searching.InsolvenceSearchResult { Q = query, PageSize = 5 }); } catch (System.Exception e) { Util.Consts.Logger.Error("MultiResult GeneralSearch for insolvence query" + query, e); } }, () => { try { if (showBeta) { var dotaceService = new Dotace.DotaceService(); var iqu = new Searching.DotaceSearchResult { Q = query, PageSize = 5 }; res.Dotace = iqu; //if (showBeta) res.Dotace = dotaceService.SimpleSearch(new Searching.DotaceSearchResult { Q = query, PageSize = 5 }); } } catch (System.Exception e) { Util.Consts.Logger.Error("MultiResult GeneralSearch for insolvence query" + query, e); } }, () => { Elastic.Apm.Api.ISpan sp = null; try { apmtran.CaptureSpan("Dataset GeneralSearch", "search", () => { res.Datasets = Lib.Data.Search.DatasetMultiResult.GeneralSearch(query, null, 1, 5); if (res.Datasets.Exceptions.Count > 0) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for DatasetMulti query " + query, res.Datasets.GetExceptions()); } }); } catch (System.Exception e) { HlidacStatu.Util.Consts.Logger.Error("MultiResult GeneralSearch for DatasetMulti query " + query, e); } finally { } } ); //TODO too slow, temporarily disabled if (false && res.HasFirmy && (res.Osoby == null || res.Osoby.Total < 5)) { var sw = new Devmasters.Core.StopWatchEx(); sw.Start(); if (res.Osoby == null) { res.Osoby = new GeneralResult <Osoba>(new Osoba[] { }); } res.Osoby = new GeneralResult <Osoba>(res.Osoby.Result .Concat(Osoba.GetPolitikByQueryFromFirmy(query, (int)(10 - (res.Osoby?.Total ?? 0)), res.Firmy.Result) ) ); res.OsobaFtx = true; sw.Stop(); res.AddOsobyTime = sw.Elapsed; } totalsw.Stop(); res.TotalSearchTime = totalsw.Elapsed; return(res); }