private List <FormulaItem> BuildFormula(List <string> lines, SearchableContent searchData) { List <FormulaItem> formulaItems = new List <FormulaItem>(); foreach (string line in lines) { List <string> casArray = helpers.GetCASNumbers(line); foreach (string cas in casArray) { FormulaItem formulaItem = new FormulaItem { CASNumber = cas, OtherInfo = line }; if (searchData.KnownChemicals.Exists(c => c.CAS == cas)) { formulaItem.ChemName = searchData.KnownChemicals.Where(c => c.CAS == cas).FirstOrDefault().ChemName; formulaItem.Score = (int)Scores.HIGH_SCORE; } else { formulaItem.ChemName = "NAME NOT FOUND"; formulaItem.Score = (int)Scores.MEDIUM_SCORE; } formulaItems.Add(formulaItem); } } return(formulaItems); }
public TextProcessorEngine(IMemoryCache _cache, ServiceSettings _settings) { cache = _cache; settings = _settings; helpers = new Helpers(cache, settings); docItemBuilder = new DocItemBuilder(cache, settings); formulaBuilder = new FormulaBuilder(cache, settings); searchData = new SearchableContent() { KnownChemicals = new List <Component>(), OtherIdentifiers = new List <OtherIdentifier>(), SectionHeaders = new List <SectionHeader>(), SectionList = new List <Section>() }; searchData.KnownChemicals = cache.GetOrCreate <List <Component> >("knownComponents", cacheEntry => { return(helpers.GetKnownComponents()); }); searchData.SectionHeaders = cache.GetOrCreate <List <SectionHeader> >("sectionHeaders", cacheEntry => { return(helpers.GetSectionHeaders()); }); searchData.OtherIdentifiers = cache.GetOrCreate <List <OtherIdentifier> >("otherIdentifiers", cacheEntry => { return(helpers.GetOtherIdentifiers()); }); }
public DocItem SearchForItem(DocItem docItem, List <string> textlines, SearchableContent searchData) { foreach (string searchText in docItem.Terms) { int currentLine = 0; string termType = docItem.Hint.ToLower(); /* * if (helpers.IsOnlyNumbers(docItem.Section)) * { * int stopSecton = int.Parse(docItem.Section); * stopSecton = stopSecton++; * textlines = helpers.GetSection(textlines, docItem.Section, stopSecton.ToString(), searchData); * } */ foreach (string line in textlines) { string evalLine = line.ToLower(); if (evalLine.Contains(searchText.ToLower())) { string restofLine = evalLine.Substring(evalLine.IndexOf(searchText) + searchText.Length + 1).Trim(); if (termType == "number") { List <string> evalWords = evalLine.Split(" ").ToList(); evalWords.RemoveAll(x => x.Trim() == string.Empty); docItem = CheckValue(docItem, searchText, restofLine, evalWords, line.ToLower(), textlines, currentLine); } else if (termType == "text") { docItem = CheckText(docItem, restofLine); } else if (termType == "yesno") { docItem = CheckBool(docItem, restofLine); } else { docItem = CheckText(docItem, restofLine); } } if (!string.IsNullOrEmpty(docItem.Result)) { Log.Debug($"[Found DocItem Term]: {searchText} [Score]: {docItem.Score} [Result]: {docItem.Result} "); return(docItem); } else { Log.Debug($"[Missed DocItem Term]: {searchText} "); } currentLine++; } } return(docItem); }
public void Index(SearchableContent content) { _elasticSearchRepository.EnsureMappingExist(); _elasticSearchRepository.Save(content); }
public List <string> GetSection(List <string> textlines, string startSection, string stopSection, SearchableContent searchData) { //sectionHeaders string are already lower case List <string> frag = new List <string>(); //if we already parsed the section use it again if (searchData.SectionList.Any(s => s.Number == startSection)) { frag = searchData.SectionList.Where(s => s.Number == startSection).First().Content; return(frag); } else { List <string> startTextList = searchData.SectionHeaders.Where(x => x.Number == startSection).Select(y => y.Title.ToLower()).ToList(); List <string> stopTextList = searchData.SectionHeaders.Where(x => x.Number == stopSection).Select(y => y.Title.ToLower()).ToList(); frag = GetDocumentFragment(startTextList, stopTextList, textlines); if (!searchData.SectionList.Any(s => s.Number == startSection)) { searchData.SectionList.Add(new Section() { Number = startSection, Content = frag }); } return(frag); } }
public List <FormulaItem> GetFormulation(List <string> textlines, string requestGuid, SearchableContent searchData) { List <FormulaItem> items = new List <FormulaItem>(); List <string> formulaLines = helpers.GetSection(textlines, "3", "4", searchData); if (formulaLines.Count > 0) { items = BuildFormula(formulaLines, searchData); } else { if (items.Count == 0) { formulaLines = helpers.GetSection(textlines, "2", "3", searchData); //try pre GHS structure if (formulaLines.Count > 0) { items = BuildFormula(formulaLines, searchData); } if (items.Count == 0) { Log.Information($"No formula section found for request {requestGuid}"); } } } return(items); }