private List <FormulaItem> BuildFormula(List <string> lines, SearchableContent searchData)
        {
            List <FormulaItem> formulaItems = new List <FormulaItem>();

            foreach (string line in lines)
            {
                List <string> casArray = helpers.GetCASNumbers(line);

                foreach (string cas in casArray)
                {
                    FormulaItem formulaItem = new FormulaItem
                    {
                        CASNumber = cas,
                        OtherInfo = line
                    };

                    if (searchData.KnownChemicals.Exists(c => c.CAS == cas))
                    {
                        formulaItem.ChemName = searchData.KnownChemicals.Where(c => c.CAS == cas).FirstOrDefault().ChemName;
                        formulaItem.Score    = (int)Scores.HIGH_SCORE;
                    }
                    else
                    {
                        formulaItem.ChemName = "NAME NOT FOUND";
                        formulaItem.Score    = (int)Scores.MEDIUM_SCORE;
                    }

                    formulaItems.Add(formulaItem);
                }
            }

            return(formulaItems);
        }
        public TextProcessorEngine(IMemoryCache _cache, ServiceSettings _settings)
        {
            cache    = _cache;
            settings = _settings;

            helpers        = new Helpers(cache, settings);
            docItemBuilder = new DocItemBuilder(cache, settings);
            formulaBuilder = new FormulaBuilder(cache, settings);

            searchData = new SearchableContent()
            {
                KnownChemicals   = new List <Component>(),
                OtherIdentifiers = new List <OtherIdentifier>(),
                SectionHeaders   = new List <SectionHeader>(),
                SectionList      = new List <Section>()
            };


            searchData.KnownChemicals = cache.GetOrCreate <List <Component> >("knownComponents",
                                                                              cacheEntry =>
            {
                return(helpers.GetKnownComponents());
            });

            searchData.SectionHeaders = cache.GetOrCreate <List <SectionHeader> >("sectionHeaders",
                                                                                  cacheEntry =>
            {
                return(helpers.GetSectionHeaders());
            });

            searchData.OtherIdentifiers = cache.GetOrCreate <List <OtherIdentifier> >("otherIdentifiers",
                                                                                      cacheEntry =>
            {
                return(helpers.GetOtherIdentifiers());
            });
        }
Exemple #3
0
        public DocItem SearchForItem(DocItem docItem, List <string> textlines, SearchableContent searchData)
        {
            foreach (string searchText in docItem.Terms)
            {
                int currentLine = 0;

                string termType = docItem.Hint.ToLower();

/*
 *                              if (helpers.IsOnlyNumbers(docItem.Section))
 *                              {
 *                                      int stopSecton = int.Parse(docItem.Section);
 *                                      stopSecton = stopSecton++;
 *                                      textlines = helpers.GetSection(textlines, docItem.Section, stopSecton.ToString(), searchData);
 *                              } */

                foreach (string line in textlines)
                {
                    string evalLine = line.ToLower();

                    if (evalLine.Contains(searchText.ToLower()))
                    {
                        string restofLine = evalLine.Substring(evalLine.IndexOf(searchText) + searchText.Length + 1).Trim();

                        if (termType == "number")
                        {
                            List <string> evalWords = evalLine.Split(" ").ToList();
                            evalWords.RemoveAll(x => x.Trim() == string.Empty);

                            docItem = CheckValue(docItem, searchText, restofLine,
                                                 evalWords, line.ToLower(), textlines, currentLine);
                        }
                        else if (termType == "text")
                        {
                            docItem = CheckText(docItem, restofLine);
                        }
                        else if (termType == "yesno")
                        {
                            docItem = CheckBool(docItem, restofLine);
                        }
                        else
                        {
                            docItem = CheckText(docItem, restofLine);
                        }
                    }

                    if (!string.IsNullOrEmpty(docItem.Result))
                    {
                        Log.Debug($"[Found DocItem Term]: {searchText} [Score]: {docItem.Score} [Result]: {docItem.Result} ");
                        return(docItem);
                    }
                    else
                    {
                        Log.Debug($"[Missed DocItem Term]: {searchText} ");
                    }

                    currentLine++;
                }
            }

            return(docItem);
        }
Exemple #4
0
 public void Index(SearchableContent content)
 {
     _elasticSearchRepository.EnsureMappingExist();
     _elasticSearchRepository.Save(content);
 }
Exemple #5
0
        public List <string> GetSection(List <string> textlines, string startSection, string stopSection, SearchableContent searchData)
        {
            //sectionHeaders string are already lower case

            List <string> frag = new List <string>();

            //if we already parsed the section use it again
            if (searchData.SectionList.Any(s => s.Number == startSection))
            {
                frag = searchData.SectionList.Where(s => s.Number == startSection).First().Content;

                return(frag);
            }
            else
            {
                List <string> startTextList = searchData.SectionHeaders.Where(x => x.Number == startSection).Select(y => y.Title.ToLower()).ToList();

                List <string> stopTextList = searchData.SectionHeaders.Where(x => x.Number == stopSection).Select(y => y.Title.ToLower()).ToList();

                frag = GetDocumentFragment(startTextList, stopTextList, textlines);

                if (!searchData.SectionList.Any(s => s.Number == startSection))
                {
                    searchData.SectionList.Add(new Section()
                    {
                        Number = startSection, Content = frag
                    });
                }

                return(frag);
            }
        }
        public List <FormulaItem> GetFormulation(List <string> textlines, string requestGuid, SearchableContent searchData)
        {
            List <FormulaItem> items = new List <FormulaItem>();

            List <string> formulaLines = helpers.GetSection(textlines, "3", "4", searchData);

            if (formulaLines.Count > 0)
            {
                items = BuildFormula(formulaLines, searchData);
            }
            else
            {
                if (items.Count == 0)
                {
                    formulaLines = helpers.GetSection(textlines, "2", "3", searchData);                                 //try pre GHS structure
                    if (formulaLines.Count > 0)
                    {
                        items = BuildFormula(formulaLines, searchData);
                    }
                    if (items.Count == 0)
                    {
                        Log.Information($"No formula section found for request {requestGuid}");
                    }
                }
            }

            return(items);
        }