Beispiel #1
0
        public List <SubstancePreview> GetByQuery(QueryModel query)
        {
            var elements = _elements.GetAll().ToList();
            var qToDb    = _substances.GetAll();

            var name = "";

            qToDb = BuildQuery(qToDb, query, out name);

            var list = qToDb.Take(100).ToList();

            var result = list.Select(x => new SubstancePreview
            {
                Name       = x.Names.FirstOrDefault(n => n.Value.ToLower().Contains(name)).Value,
                Formula    = x.Formula,
                Categories = GetCategoryList(x.Categories).OrderBy(c => c.Id).Select(c => c.Name).Distinct().ToArray(),
                Synonyms   = x.Names.Select(n => n.Value).Take(12).ToArray(),
                Scheme     = x.Scheme.Select(s => s.Value.HtmlDecode()).FirstOrDefault(),
                Id         = x.Id
            }).ToList();

            return(result);
        }
Beispiel #2
0
        public override string Crawl()
        {
            Console.WriteLine("Start");
            string ans = "";

            var ward = new CriticalExCounter();

            var t = new Stopwatch();

            t.Start();

            //var catList = new List<SpiderSubstanceCI>();

            var addr = "http://www.chemindex.com/";

            var initialRange = _substances.GetAll().ToDictionary(x => x.Id);
            var resultRange  = new Dictionary <int, Substance>();

            Console.WriteLine(String.Format("\nTime elapsed to dic: {0}\n", t.Elapsed));

            int count = 0;
            int all   = initialRange.Count();

            foreach (var substance in initialRange)
            {
                System.Threading.Thread.Sleep(GetRnd());
                ++count;

                if (count % 25 == 0)
                {
                    Console.WriteLine(String.Format("\nElements: {0} of {1}. Time elapsed: {2}\n", count, all, t.Elapsed));
                }

                if (!substance.Value.CAS.HasValue())
                {
                    continue;
                }

                var crawler = GetCrawler();

                var uri    = new Uri(addr + substance.Value.CAS + "-cas.html");
                var cToken = new CancellationTokenSource();

                try
                {
                    crawler.CrawlBag.elements = new ConcurrentBag <SpiderSubstanceCI>();
                    var result = crawler.Crawl(uri, cToken);

                    var element = (crawler.CrawlBag.elements as ConcurrentBag <SpiderSubstanceCI>).First();
                    //if (element.Name.Length == 0)
                    //    element.Name = "sas";
                    //element.CatId = i;
                    //catList.Add(element);

                    if (result.ErrorOccurred)
                    {
                        ans += String.Format("Crawl of {0} completed with error: {1}\n",
                                             result.RootUri.AbsoluteUri,
                                             result.ErrorException.Message);
                    }

                    substance.Value.BoilingPoint    = element.BoilingPoint;
                    substance.Value.Density         = element.Density;
                    substance.Value.FlashPoint      = element.FlashPoint;
                    substance.Value.HazardSymbols   = element.HazardSymbols;
                    substance.Value.MeltingPoint    = element.MeltingPoint;
                    substance.Value.RefractiveIndex = element.RefractiveIndex;
                    substance.Value.VapourPressur   = element.VapourPressur;
                    substance.Value.WaterSolubility = element.WaterSolubility;

                    resultRange.Add(substance.Key, substance.Value);

                    ward.Tick();
                }
                catch
                {
                    ward.Bad();
                    if (ward.IsCritical())
                    {
                        Console.WriteLine(String.Format("Time elapsed scan: {0}", t.Elapsed));
                        _substances.UpdateAll(resultRange);
                        Console.WriteLine(String.Format("Time elapsed update: {0}", t.Elapsed));
                    }
                }
            }
            Console.WriteLine(String.Format("Time elapsed scan: {0}", t.Elapsed));
            _substances.UpdateAll(resultRange);
            Console.WriteLine(String.Format("Time elapsed update: {0}", t.Elapsed));
            //var trueCatList = new List<Category>();
            //foreach (var item in catList)
            //{
            //    trueCatList.Add(new Category
            //    {
            //        Name = item.Name
            //    });
            //}
            //foreach (var item in catList)
            //{
            //    var cat = trueCatList.First(x => x.Name == item.Name);
            //    if (item.Parents != null)
            //        cat.Parents = trueCatList.Where(x => item.Parents.Contains(x.Name)).ToList();
            //}

            //_categories.AddMany(trueCatList);
            return(null);// ans;
        }
Beispiel #3
0
        public override string Crawl()
        {
            var timer = new Stopwatch();

            timer.Start();

            string ans = "";

            var substSet = new SortedSet <SpiderSubstance>();

            var addr = "http://easychem.org/ru/subst-ref/?cat0=";

            for (int i = 1; i < 129; ++i)
            {
                var crawler = GetCrawler();
                var uri     = new Uri(addr + i + "&pg=1");
                var cToken  = new CancellationTokenSource();

                crawler.CrawlBag.elements = new ConcurrentBag <SpiderSubstance>();
                var result = crawler.Crawl(uri, cToken);

                var elements = (crawler.CrawlBag.elements as ConcurrentBag <SpiderSubstance>);

                foreach (var item in elements)
                {
                    substSet.Add(item);
                }

                if (result.ErrorOccurred)
                {
                    ans += String.Format("Crawl of {0} completed with error: {1}\n",
                                         result.RootUri.AbsoluteUri,
                                         result.ErrorException.Message);
                }
            }

            Console.WriteLine(String.Format("Time elapsed : {0}, PARSED", timer.Elapsed.TotalMinutes));

            var contextSet = new SortedSet <Substance>(_substances.GetAll());

            Console.WriteLine(String.Format("Time elapsed : {0}, SET_CREATED", timer.Elapsed.TotalMinutes));

            var trueSubstList = substSet.Select(x =>
            {
                return(new Substance
                {
                    CAS = x.CAS,
                    Formula = x.BruttoFormula,
                    Names = x.Names.Select(n => { return new SubstanceName(n); }).ToList(),
                    Scheme = x.Formulas.Select(f => { return new SubstanceScheme(f); }).ToList(),
                    Categories = x.Categories.Select(c =>
                    {
                        return _categories.GetAll().FirstOrDefault(z => z.Name == c);
                    }).Where(v => v != null).ToList()
                });
            });

            Console.WriteLine(String.Format("Time elapsed : {0}, TRUE_LIST_Q", timer.Elapsed.TotalMinutes));
            var listToAdd = new List <Substance>();

            foreach (var item in trueSubstList)
            {
                if (contextSet.Add(item))
                {
                    listToAdd.Add(item);
                }
            }
            Console.WriteLine(String.Format("Time elapsed : {0}, ALL_ADD", timer.Elapsed.TotalMinutes));

            _substances.AddMany(listToAdd);
            Console.WriteLine(String.Format("Time elapsed : {0}, elements found: {1}", timer.Elapsed.TotalMinutes, listToAdd.Count()));
            return(ans);
        }