Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            var configJson = @"
            {
                '':'//a[contains(text(), \'File\')]/@href'
            }
            ";

            var config = StructuredDataConfig.ParseJsonString(configJson);

            var       html    = @"http://rule34.paheal.net/post/list";
            HtmlWeb   web     = new HtmlWeb();
            WebClient wc      = new WebClient();
            var       htmlDoc = web.Load(html);
            var       body    = htmlDoc.Text;
            var       path    = Directory.GetCurrentDirectory() + "\\img\\";

            void CreataFolder()
            {
                if (!Directory.Exists(path))
                {
                    Directory.CreateDirectory(path);
                }
            }

            CreataFolder();
            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(body);

            char[] charstotrim = { '\x5C', '\x22', '\x7B', '\x20' };
            var    output      = JsonConvert.SerializeObject(scrapingResults, Formatting.Indented).Split(',');

            output[0] = output[0].Remove(0, 10);
            output[output.Length - 1] = output[output.Length - 1].Remove(output[output.Length - 1].Length - 8, 8);
            string fileName;

            for (int i = 0; i < output.Length; i++)
            {
                output[i] = output[i].Remove(0, 7);
                output[i] = output[i].Trim(charstotrim);
                if (output[i].Contains("webm"))
                {
                    fileName = path + i + ".webm";
                    Console.WriteLine(fileName);
                }
                else
                {
                    fileName = path + i + "." + output[i].Remove(0, output[i].Length - 3);
                    Console.WriteLine(fileName);
                }

                wc.DownloadFile((string)output[i], fileName);
            }
            Console.WriteLine("----------------------------");
            for (int i = 0; i < output.Length; i++)
            {
                Console.WriteLine(output[i]);
            }
            Console.ReadKey();
        }
Ejemplo n.º 2
0
        public static void WorkerSummary1(int poolid)
        {
            var driver = new ChromeDriver();

            var    jsonConfig = File.ReadAllText(@"Json\\f2pool.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url        = GetUrl(poolid);

            driver.Navigate().GoToUrl(url);

            Thread.Sleep(8000);
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["data"];

                //var all = (int)json[0];
                var active   = (int)json[1];
                var inactive = (int)json[2];
                var dead     = 0;
                if (json.Count() == 4)
                {
                    dead = (int)json[3];
                }

                JToken json2 = jObject["currenthash"];
                JToken json3 = jObject["dailyhash"];
                var    temp1 = (string)json2;
                var    temp2 = (string)json3;

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag1 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag1 = false;
                    UpdateErrorLog("f2pool", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
        public void RemoveExtraWhitespaceTransformationTest()
        {
            var html = "<html><body><div id='content'><a href=''>A link</a>with     adjacent text. &quot;the final frontier&quot;</div></body></html>";

            var configJson = @"
            {
                'text': {
                    '_xpath': '//div[@id=\'content\']',
                    '_transformations': [
                        'ExtractTextTransformation',
                        'HtmlDecodeTransformation',
                        'RemoveExtraWhitespaceTransformation'
                    ]
                }
            }
            ";

            var config = StructuredDataConfig.ParseJsonString(configJson);

            var     extractor  = new StructuredDataExtractor(config);
            var     result     = extractor.Extract(html);
            var     json       = JsonConvert.SerializeObject(result, Formatting.Indented);
            dynamic parsedJson = JsonConvert.DeserializeObject(json);

            Assert.AreEqual("A link with adjacent text. \"the final frontier\"", parsedJson["text"].Value);
        }
Ejemplo n.º 4
0
        public void Scrape()
        {
            var config = StructuredDataConfig.ParseJsonString(Top100Config);

            var html = DownloadPage(saveTo: IMDBhtml);

            var openScraping = new StructuredDataExtractor(config);

            var scrapingResults = openScraping.Extract(html);

            using (WebClient client = new WebClient())
            {
                foreach (var celeb in scrapingResults["celebrities"])
                {
                    celeb["birth"] = ScrapeCeleb(celeb);
                    var    wat = celeb["image"].ToString();
                    Uri    uri = new Uri(wat);
                    string fn  = Path.GetFileName(uri.LocalPath);
                    client.DownloadFile(wat, imgPath + fn);
                }
            }

            JsonSerializerSettings jss = new JsonSerializerSettings
            {
                StringEscapeHandling = StringEscapeHandling.Default
            };

            string textresult = JsonConvert.SerializeObject(scrapingResults, jss);

            File.WriteAllText(DBfile, textresult);
        }
        public IDataExtractor <TRawData> CreateFromName(string name)
        {
            string                    jsonConfig    = GetByName(name);
            ConfigSection             config        = StructuredDataConfig.ParseJsonString(jsonConfig);
            IDataExtractor <TRawData> dataExtractor = getDataExtractor(config);

            return(dataExtractor);
        }
Ejemplo n.º 6
0
        public async Task Transform(string content)
        {
            var config          = StructuredDataConfig.ParseJsonString(_jsonConfig);
            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(content);

            await _sender.Send(scrapingResults.ToString());
        }
Ejemplo n.º 7
0
        public IActionResult JsonResult([FromBody] ObjectJson json)
        {
            var         url    = WebUtility.UrlDecode(json.url);
            MyWebClient client = new MyWebClient()
            {
                Encoding = Encoding.UTF8
            };

            client.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36";
            if (json.isID)
            {
                client.DownloadData(url);
                var    mainUrl = client.ResponseUri.ToString();
                var    regex   = JsonConvert.DeserializeObject <JsonIDInput>(json.data);
                string item    = "";
                try
                {
                    var regexMatch = Regex.Match(mainUrl, regex._xpath);
                    item = regexMatch.Groups[regex.group_number].Value;
                }
                catch (Exception) { }
                return(Json(JsonConvert.SerializeObject(new JsonIDresult {
                    url = mainUrl, id = item
                }, Formatting.Indented)));
            }
            else
            {
                var          baseUri  = new Uri(url);
                var          isScript = json.javascript;
                var          config   = StructuredDataConfig.ParseJsonString(json.data);
                var          html     = client.DownloadString(!isScript ? url : this.configuration.GetAppSetting("UrlSeleniumGetHtmlExcuteJavascript") + "?url=" + WebUtility.UrlEncode(url));
                HtmlDocument docc     = new HtmlDocument();
                docc.LoadHtml(html);
                var urltmp = "";
                HtmlNodeCollection nodes = docc.DocumentNode.SelectNodes("//a");
                if (nodes != null)
                {
                    foreach (HtmlNode node in nodes)
                    {
                        if ((node.Attributes["href"] != null) && (node.Attributes["href"].Value != ""))
                        {
                            try
                            {
                                urltmp = node.Attributes["href"].Value.Trim();
                                node.Attributes["href"].Value = new Uri(baseUri, urltmp).AbsoluteUri;
                            }
                            catch (Exception) { }
                        }
                    }
                }
                ;
                html = docc.DocumentNode.InnerHtml;
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(html);
                var result          = JsonConvert.SerializeObject(scrapingResults, Formatting.Indented);
                return(Json(result));
            }
        }
Ejemplo n.º 8
0
        public static void WorkerSummary7(int poolid)
        {
            var driver = new ChromeDriver();

            var jsonConfig = File.ReadAllText(@"Json\\spiderpool.json");

            var    config = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url    = GetUrl(poolid);

            //var url = "https://www.spiderpool.com/coin/show/btc/yibobtc01/detail.html";
            driver.Navigate().GoToUrl(url);

            Thread.Sleep(1000);
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["data"];


                var temp1 = (string)json[8];
                var temp2 = (string)json[10];

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                var temp     = (string)json[6];
                var active   = Int32.Parse(temp.Substring(0, temp.IndexOf('/')));
                var total    = Int32.Parse(temp.Substring(temp.LastIndexOf('/') + 1));
                int inactive = total - active;
                int dead     = 0;

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag7 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag7 = false;
                    UpdateErrorLog("viabtc", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
Ejemplo n.º 9
0
        public static void WorkerSummary6(int poolid)
        {
            var driver = new ChromeDriver();

            var jsonConfig = File.ReadAllText(@"Json\\viabtc.json");

            var    config = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url    = GetUrl(poolid);

            //var url = "https://pool.viabtc.com/observer/dashboard?access_key=cb735a866859b626a748c0fb4a479394";
            driver.Navigate().GoToUrl(url);

            //Thread.Sleep(1000);
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["data"];


                var temp1 = (string)json[0];
                var temp2 = (string)json[2];

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                var active   = Int32.Parse((string)json[3]);
                var inactive = Int32.Parse((string)json[4]);
                int total    = 0;
                int dead     = 0;

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag6 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag6 = false;
                    UpdateErrorLog("viabtc", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
Ejemplo n.º 10
0
        public async Task Transform(string content)
        {
            //transform intoJson, create object template in shared becouse we need the same object in Loader to deserialize
            var configJson      = GenerateJson();
            var config          = StructuredDataConfig.ParseJsonString(configJson);
            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(content);

            await _sender.Send(scrapingResults.ToString());
        }
Ejemplo n.º 11
0
        public StructuredDataExtractor(string configString)
        {
            if (string.IsNullOrEmpty(configString))
            {
                throw new ArgumentNullException(nameof(configString));
            }
            config = StructuredDataConfig.ParseJsonString(configString);

            LoadTransformations();
        }
Ejemplo n.º 12
0
        public static void WorkerSummary5(int poolid)
        {
            var    driver     = new ChromeDriver();
            var    jsonConfig = File.ReadAllText(@"Json\\antpool.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url        = GetUrl(poolid);

            driver.Navigate().GoToUrl(url);

            Thread.Sleep(1000);
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["data"];

                var temp  = (string)json[0];
                var temp1 = (string)json[1];
                var temp2 = (string)json[3];

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                var numbers  = Regex.Split(temp.Trim(), @"\D+");
                var active   = Int32.Parse(numbers[0]);
                var total    = Int32.Parse(numbers[1]);
                var inactive = total - active;
                int dead     = 0;

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag5 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag5 = false;
                    UpdateErrorLog("antpool", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
Ejemplo n.º 13
0
        private JToken ScrapeCeleb(JToken jToken)
        {
            var config = StructuredDataConfig.ParseJsonString(CelebConfig);

            jToken["page"] = "https://www.imdb.com" + jToken["page"];
            var html            = DownloadPage(jToken["page"].ToString());
            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(html);

            return(scrapingResults["celebrities"]["birth"]);
        }
Ejemplo n.º 14
0
        public StructuredDataExtractor(FileInfo configFile)
        {
            if (configFile == null)
            {
                throw new ArgumentNullException(nameof(configFile));
            }

            var configString = File.ReadAllText(configFile.FullName);

            config = StructuredDataConfig.ParseJsonString(configString);

            LoadTransformations();
        }
Ejemplo n.º 15
0
        private static ConfigSection CreateConfig(string resourceName)
        {
            var assembly = typeof(WikiSearcher).Assembly;

            string[] names = assembly.GetManifestResourceNames();
            if (!names.Any(n => n.Equals(resourceName)))
            {
                resourceName = names.FirstOrDefault(n => n.Contains(resourceName));
            }
            var stream = assembly.GetManifestResourceStream(resourceName);
            var reader = new System.IO.StreamReader(stream);

            return(StructuredDataConfig.ParseJsonString(reader.ReadToEnd()));
        }
Ejemplo n.º 16
0
        private async Task Init()
        {
            // Path to the folder with classifiers models
            var jarRoot = @"C:\stanford-ner-2018-10-16";
            var classifiersDirecrory = jarRoot + @"\classifiers";

            // Loading 3 class classifier model
            _classifier = CRFClassifier.getClassifierNoExceptions(
                classifiersDirecrory + @"\english.all.3class.distsim.crf.ser.gz");

            // Define a regular expression for finding the location element
            _locationRx = new Regex(@"<LOCATION\b[^>]*>(.*?)</LOCATION>",
                                    RegexOptions.Compiled | RegexOptions.IgnoreCase);

            // Define configurations for parsing artist and listener info
            var configArtistInfoJson = @"
            {
                'artist': '//h1[contains(@class, \'view-header\')]',
                'about': '//div[contains(@class, \'bio-primary\')]',
                'more': '//div[contains(@class, \'bio-secondary\')]',
                'listeners-city': '//span[contains(@class, \'horizontal-list__item__title\')]',
                'listeners': '//span[contains(@class, \'horizontal-list__item__subtitle\')]'
            }";

            ConfigSection configArtist = StructuredDataConfig.ParseJsonString(configArtistInfoJson);

            _artistScraping = new StructuredDataExtractor(configArtist);

            // Get the hosted feature layers for editing
            ArcGISPortal portal = await ArcGISPortal.CreateAsync();

            PortalItem hometownLayerItem = await PortalItem.CreateAsync(portal, _hometownLayerId);

            PortalItem otherPointsLayerItem = await PortalItem.CreateAsync(portal, _otherPointsLayerId);

            PortalItem listenerLayerItem = await PortalItem.CreateAsync(portal, _listenerLayerId);

            _hometownTable    = new ServiceFeatureTable(hometownLayerItem, 0);
            _otherPointsTable = new ServiceFeatureTable(otherPointsLayerItem, 0);
            _listenerTable    = new ServiceFeatureTable(listenerLayerItem, 0);
            await _hometownTable.LoadAsync();

            await _otherPointsTable.LoadAsync();

            await _listenerTable.LoadAsync();
        }
Ejemplo n.º 17
0
        static void ExtractJsonLdRecipe()
        {
            //// 1.
            //// URL: http://en.wikipedia.org/wiki/Main_Page
            //WebClient w = new WebClient();
            //string s = w.DownloadString("https://headbangerskitchen.com/recipe/low-carb-dessert/");

            var configJson = @"
            {
                'title': '//h1',
                'body': '//script[contains(@type, \'application\/ld+json\')]'
            }";

            var           config = StructuredDataConfig.ParseJsonString(configJson);
            StringBuilder sb     = new StringBuilder("{");

            sb.Append("\"@context\":\"http://schema.org/\",");
            sb.Append("\"@type\":\"Recipe\",");
            sb.Append("\"name\":\"Keto Coffee & Chocolate Tart\",\"author\":{ \"@type\":\"Person\",\"name\":\"Sahil Makhija\"},\"datePublished\":\"2009-11-05T00:00:00+00:00\",");
            sb.Append("\"description\":\"A delicious layered low carb dessert with the flavours of chocolate and coffee\",\"recipeYield\":\"3 servings\",\"aggregateRating\":{ \"@type\":\"AggregateRating\",\"ratingValue\":\"5\",\"ratingCount\":\"6\"},");
            sb.Append("\"prepTime\":\"PT10M\",\"cookTime\":\"PT20M\",");
            sb.Append("\"recipeIngredient\":[\"45 grams Almond Flour ( I use this one )\",\"30 grams Salted Butter\",\"1 Tbsp Unsweetened Coco Powder ( I recommend this one )\",\"150 grams Mascarpone cheese\",\"1 Tsp Vanilla Extract\",\"2 Tbsp Water\",\"1 Tsp Instant espresso powder\",\"100 ml Heavy Cream\",\"30 grams Dark Chocolate (85% or Higher) (I use Lindt 85%)\",\"Stevia to taste\"],");
            sb.Append("\"recipeInstructions\":[");
            sb.Append("\"Microwave the butter for 30 seconds till melted\",\"Add in your stevia/sweetner to taste, vanilla essence and the coco powder and mix well together\",\"Add in the almond flour and combine till well incorporated\",");
            sb.Append("\"Divide the mixture in 3 tart tins or ramekins and shape the base\",\"Bake at 175 C/ 350 F for 10 minutes and then allow them to cool\",\"Heat 2 tablespoons of water and mix 1 tsp of instant espresso powder into that\",");
            sb.Append("\"Whip the mascarpone cheese, stevia, vanilla extract and coffee mixture together till nice and fluffy\",\"Pour the mascarpone mixture over the base and chill in the fridge for 15 minutes\",");
            sb.Append("\"Meanwhile warm up the cream for 30 seconds in the microwave and add the chocolate and sweetner to that and mix till fully melted and you have a creamy ganache\",\"Pour the ganache over the mascarpone mousse in the tart molds and chill in the fridge for an hour\",\"Finish with some sea salt on top of each tart.\"],");
            sb.Append("\"recipeCategory\":\"Dessert\",\"recipeCuisine\":\"General\",\"suitableForDiet\": \"http://schema.org/LowFatDiet\"}");

            var json = sb.ToString();
            var serializerSettings = new JsonSerializerSettings()
            {
                DateParseHandling = DateParseHandling.DateTimeOffset
            };

            serializerSettings.Converters.Add(new IsoDateTimeConverter());
            serializerSettings.Converters.Add(new TimeSpanToISO8601DurationValuesConverter());


            Recipe rec = JsonConvert.DeserializeObject <Recipe>(json, serializerSettings);

            Console.WriteLine("Extracting LD+JSON Recipe.....");
            Console.Write(rec);
            Console.ReadKey();
        }
Ejemplo n.º 18
0
        public T Run()
        {
            using (var client = new WebClient())
            {
                var html = client.DownloadString(UrlConstants.BaseUrl + _relativeUrl);

                var configuration   = StructuredDataConfig.ParseJsonString(ConfigurationJson);
                var openScraping    = new StructuredDataExtractor(configuration);
                var scrapingResults = openScraping.Extract(html);

                var serializedObject   = JsonConvert.SerializeObject(scrapingResults, Formatting.Indented);
                var deserializedObject = JsonConvert.DeserializeObject <T>(serializedObject);

                WaitRandom();

                return(deserializedObject);
            }
        }
Ejemplo n.º 19
0
        static void Main(string[] args)
        {
            var jsonConfig = File.ReadAllText(@"match-result.config.json");
            var config     = StructuredDataConfig.ParseJsonString(jsonConfig);

            var html = string.Empty;

            using (WebClient client = new WebClient())
            {
                client.Encoding = Encoding.UTF8;
                html            = client.DownloadString("http://virtualsoccer.ru/viewmatch.php?day=12968&match_id=213340");
            }

            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(html);

            Console.WriteLine(JsonConvert.SerializeObject(scrapingResults, Formatting.Indented));
            Console.ReadKey();
        }
Ejemplo n.º 20
0
        public static MyRecipe ExtractRecipe(string url)
        {
            string   urlResponse;
            MyRecipe myRecipe = null;

            // 1. Get Response from url
            using (WebClient w = new WebClient())
            {
                urlResponse = w.DownloadString(url);
            }

            //2: Check and scrape if any structured JSON is present (application/ld+json)
            var configJson      = @"{                
                'data': '//script[contains(@type, \'application\/ld+json\')]'
            }";
            var config          = StructuredDataConfig.ParseJsonString(configJson);
            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(urlResponse);

            if (scrapingResults != null && scrapingResults["data"] != null)
            {
                var content = scrapingResults["data"].ToString();
                if (content.Contains("\"@type\":\"Recipe\""))
                {
                    try
                    {
                        var serializerSettings = new JsonSerializerSettings()
                        {
                            DateParseHandling = DateParseHandling.DateTimeOffset
                        };
                        Recipe        rec     = JsonConvert.DeserializeObject <Recipe>(content, serializerSettings);
                        RecipeBuilder builder = new RecipeBuilder();
                        myRecipe = builder.Build(rec);
                    }
                    catch (Exception e)
                    {
                    }
                }
            }
            return(myRecipe);
        }
        public void ExtractTextTest()
        {
            var html = "<html><body><div id='content'><a href=''>A link</a>with adjacent text.</div></body></html>";

            var configJson = @"
            {
                'text': {
                    '_xpath': '//div[@id=\'content\']',
                    '_transformation': 'ExtractTextTransformation'
                }
            }
            ";

            var config = StructuredDataConfig.ParseJsonString(configJson);

            var     extractor  = new StructuredDataExtractor(config);
            var     result     = extractor.Extract(html);
            var     json       = JsonConvert.SerializeObject(result, Formatting.Indented);
            dynamic parsedJson = JsonConvert.DeserializeObject(json);

            Assert.AreEqual("A link with adjacent text.", parsedJson["text"].Value);
        }
        public void UrlEncodeTest()
        {
            var html = "<html><body><div id='content'><a href='hello world'></a></div></body></html>";

            var configJson = @"
            {
                'text': {
                    '_xpath': '//div[@id=\'content\']/a/@href',
                    '_transformation': 'UrlEncodeTransformation'
                }
            }
            ";

            var config = StructuredDataConfig.ParseJsonString(configJson);

            var     extractor  = new StructuredDataExtractor(config);
            var     result     = extractor.Extract(html);
            var     json       = JsonConvert.SerializeObject(result, Formatting.Indented);
            dynamic parsedJson = JsonConvert.DeserializeObject(json);

            Assert.AreEqual("hello+world", parsedJson["text"].Value);
        }
        public void CastToIntegerTest()
        {
            var html = "<meta property=\"width\" content=\"1200\">";

            var configJson = @"
            {
                'width': {
                    '_xpath': '/meta[@property=\'width\']/@content',
                    '_transformation': 'CastToIntegerTransformation'
                }
            }
            ";

            var config = StructuredDataConfig.ParseJsonString(configJson);

            var     extractor  = new StructuredDataExtractor(config);
            var     result     = extractor.Extract(html);
            var     json       = JsonConvert.SerializeObject(result, Formatting.Indented);
            dynamic parsedJson = JsonConvert.DeserializeObject(json);

            Assert.AreEqual(1200, parsedJson["width"].Value);
        }
Ejemplo n.º 24
0
        internal static StockPrice FilerTheStockpriceFromYahoo(string httpResposeMessage)
        {
            StockPrice sp = new StockPrice();

            TimeZoneInfo INDIAN_ZONE = TimeZoneInfo.FindSystemTimeZoneById("India Standard Time");
            DateTime     tm          = TimeZoneInfo.ConvertTimeFromUtc(DateTime.UtcNow, INDIAN_ZONE);
            DateTime     dt          = DateTime.Today;

            var configJson = @"
                        {
                            'price':'//div[1]/div/div/div[1]/div/div[2]/div/div/div[4]/div/div/div/div[3]/div/div/span[1]',
                            'Closing':'//div[1]/div/div/div[1]/div/div[2]/div/div/div[4]/div/div/div/div[3]/div/div/div/span',
                            'DK':'//span[7]'
                        }";

            try
            {
                var config = StructuredDataConfig.ParseJsonString(configJson);

                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(httpResposeMessage);

                System.Diagnostics.Debug.WriteLine(JsonConvert.SerializeObject(scrapingResults, Formatting.Indented));

                //Closing >> At close: May 24 3:29PM IST
                //Opening >> As of  9:23AM IST. Market open.
                //As of May 27 9:30AM IST. Market open.
                //As of May 27 9:26AM IST. Market open.
                //As of May 24 3:52PM IST. Market open.

                var    thePrice = scrapingResults["price"];
                var    splits = scrapingResults["Closing"].ToString().Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries);
                var    index = Array.FindIndex(splits, x => x.ToUpper().TrimEnd(new char[] { '.', ',' }) == "IST");
                string sDate = string.Empty, sTime = string.Empty;
                if (index > -1)
                {
                    if (index - 1 > -1)
                    {
                        sTime = $"{splits[index - 1]}";
                    }
                    if (index - 3 > -1)
                    {
                        sDate = $"{splits[index - 2]} {splits[index - 3]}";
                    }
                }

                //** precaution in case missing date & time**//
                if (sDate.IsDateType())
                {
                    dt = (DateTime)Convert.ChangeType(sDate, typeof(DateTime));
                }
                if (sTime.IsDateType())
                {
                    tm = (DateTime)Convert.ChangeType(sTime, typeof(DateTime));
                }

                //** adjust the date if date in missing in the downloaded time stamp**//
                var currectedDate = dt.Date.Add(tm.TimeOfDay);
                currectedDate = currectedDate > TimeZoneInfo.ConvertTimeFromUtc(DateTime.UtcNow, INDIAN_ZONE) ?
                                currectedDate.AddDays(-1) : currectedDate;

                //** Convert the downloaded time into last business day**//
                if (currectedDate.IsWeekend())
                {
                    currectedDate = currectedDate.PreviousWorkDay();
                }

                sp.Price   = double.Parse(thePrice.ToString().Trim());
                sp.ValueOn = currectedDate;
            }
            catch (Exception ex)
            {
                throw ex;
            }
            return(sp);
        }
Ejemplo n.º 25
0
        internal static StockPrice FilerTheStockpriceFromRediff(string httpResposeMessage)
        {
            TimeZoneInfo INDIAN_ZONE = TimeZoneInfo.FindSystemTimeZoneById("India Standard Time");
            DateTime     tm          = TimeZoneInfo.ConvertTimeFromUtc(DateTime.UtcNow, INDIAN_ZONE);
            DateTime     dt          = DateTime.Today;

            StockPrice sp         = new StockPrice();
            var        configJson = @"
                        {
                            'price':'//span[2]',
                            'LastTradedDate':'//span[6]',
                            'LastTradedTime':'//span[7]'
                        }";

            //            var configJson = @"
            //            {
            //                'title1': '//h1',
            //                'title': '//script',
            //                'price':'//span[2]',
            //                'LastTradedDate':'//span[6]',
            //                'LastTradedTime':'//span[7]',
            //'body': '//div[contains(@class, \'article\')]'
            //            }
            //            ";
            //            var html = "<html><body><h1>Article title</h1><div class='article'>Article contents</div></body></html>";
            //            html = httpResposeMessage;
            try
            {
                var config = StructuredDataConfig.ParseJsonString(configJson);

                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(httpResposeMessage);

                System.Diagnostics.Debug.WriteLine(JsonConvert.SerializeObject(scrapingResults, Formatting.Indented));

                var thePrice = scrapingResults["price"];
                var theDate  = scrapingResults["LastTradedDate"];
                var theTime  = scrapingResults["LastTradedTime"];

                sp.Price = double.Parse(thePrice.ToString().Trim());

                //DateTime dt = DateTime.ParseExact(theDate.ToString().Trim(), "dd MMM", CultureInfo.InvariantCulture);
                //DateTime tm = DateTime.ParseExact(theTime.ToString().Trim(), "HH:mm:ss", CultureInfo.InvariantCulture);

                //** precaution in case missing date & time**//
                if (!string.IsNullOrEmpty(theDate.ToString()))
                {
                    if (theDate.ToString().IsDateType())
                    {
                        //    dt = (DateTime)Convert.ChangeType(theDate, typeof(DateTime));
                        dt = DateTime.ParseExact(theDate.ToString().Trim(), "dd MMM", CultureInfo.InvariantCulture);
                    }

                    if (theTime.ToString().IsDateType())
                    {
                        tm = (DateTime)Convert.ChangeType(theTime, typeof(DateTime));
                    }
                }
                else
                {
                    var DataNTime = theTime.ToString().Split(',', StringSplitOptions.RemoveEmptyEntries);
                    if (DataNTime.Length == 2) //has date and time
                    {
                        if (DataNTime[0].IsDateType())
                        {
                            //dt = (DateTime)Convert.ChangeType(DataNTime[0], typeof(DateTime));
                            dt = DateTime.ParseExact(DataNTime[0], "dd MMM", CultureInfo.InvariantCulture);
                        }
                        if (DataNTime[1].ToString().IsDateType())
                        {
                            tm = (DateTime)Convert.ChangeType(DataNTime[1], typeof(DateTime));
                        }
                    }
                    else   //has time only
                    {
                        if (DataNTime[0].ToString().IsDateType())
                        {
                            tm = (DateTime)Convert.ChangeType(DataNTime[0], typeof(DateTime));
                        }
                    }
                }
                //** adjust the date if date in missing in the downloaded time stamp**//
                var currectedDate = dt.Date.Add(tm.TimeOfDay);
                currectedDate = currectedDate > TimeZoneInfo.ConvertTimeFromUtc(DateTime.UtcNow, INDIAN_ZONE) ?
                                currectedDate.AddYears(-1) : currectedDate;
                sp.ValueOn = currectedDate;
            }
            catch (Exception ex)
            {
                throw ex;
            }
            return(sp);
        }
Ejemplo n.º 26
0
        public static void Worker1(int poolid)
        {
            var    jsonConfig = File.ReadAllText(@"Json\\f2pool.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string html;
            string url = GetUrl(poolid);

            using (WebClient client = new WebClient())
            {
                client.Encoding = Encoding.UTF8;
                client.Headers.Add(HttpRequestHeader.UserAgent, "");
                html = client.DownloadString(url);
            }
            //var html = File.ReadAllText(@"f2pool.html", Encoding.UTF8);

            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(html);

            //var json = JsonConvert.SerializeObject(scrapingResults);
            var jsonres = JsonConvert.DeserializeObject(scrapingResults.ToString());

            foreach (var item in (dynamic)jsonres)
            {
                //string checkquery = "SELECT COUNT(*) FROM worker where poolid = '" + poolid + "' and workername = '" + item.workername + "' ";
                //var count = db.Query<int>(checkquery).FirstOrDefault();
                var count = 0;

                if (count == 0)
                {
                    if (item.currenthash == "0" || item.currenthash == "0.00")
                    {
                        //
                        Worker worker = new Worker();
                        worker.poolid          = poolid;
                        worker.workername      = item.workername;
                        worker.currenthashrate = item.currenthash;
                        worker.dailyhashrate   = item.dailyhash;
                        worker.rejected        = item.rejected;
                        worker.updateat        = DateTime.Now;
                        worker.isactive        = false;
                        Add(worker);
                    }
                    else if (item.currenthash == null)
                    {
                        continue;
                    }
                    else
                    {
                        //DateTime datetime = Convert.ToDateTime(date);
                        // insert to
                        Worker worker = new Worker();
                        worker.poolid          = poolid;
                        worker.workername      = item.workername;
                        worker.currenthashrate = item.currenthash;
                        worker.dailyhashrate   = item.dailyhash;
                        worker.rejected        = item.rejected;
                        worker.updateat        = DateTime.Now;
                        worker.isactive        = true;
                        Add(worker);
                    }
                }
                else
                {
                    bool   isactive = (item.currenthash == "0" || item.currenthash == "0.00") ? false : true;
                    Worker worker   = new Worker();
                    worker.poolid          = poolid;
                    worker.workername      = item.workername;
                    worker.currenthashrate = item.currenthash;
                    worker.dailyhashrate   = item.dailyhash;
                    worker.rejected        = item.rejected;
                    worker.updateat        = DateTime.Now;
                    worker.isactive        = isactive;
                    Update(worker);
                }

                //Console.WriteLine("{0} {1} {2} {3}\n", item.workername, item.currenthash,
                //    item.dailyhash, item.rejected);
            }

            //Console.ReadKey();
        }
Ejemplo n.º 27
0
        public static void WorkerSummary2(int poolid)
        {
            var driver = new ChromeDriver();

            var    jsonConfig = File.ReadAllText(@"Json\\poolin.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url        = GetUrl(poolid);

            driver.Navigate().GoToUrl(url);

            Thread.Sleep(15000);
            //WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(30));
            //wait.Until(dr => dr.FindElement(By.XPath("//p[contains(@class, 'f-tac')]")));
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                //var jsonres = JsonConvert.DeserializeObject(scrapingResults.ToString());
                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["data"];
                var     temp1   = (string)json[0];
                var     temp2   = (string)json[1];

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                int active, inactive;
                int dead = 0;
                // no data for dead

                if ((string)json[2] == "-")
                {
                    active = 0;
                }
                else
                {
                    active = (int)json[2];
                }

                if ((string)json[3] == "-")
                {
                    inactive = 0;
                }
                else
                {
                    inactive = (int)json[3];
                }

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag2 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag2 = false;
                    UpdateErrorLog("poolin", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
Ejemplo n.º 28
0
        public static void WorkerSummary4(int poolid)
        {
            var driver = new ChromeDriver();

            var    jsonConfig = File.ReadAllText(@"Json\\huobi.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string url        = GetUrl(poolid);


            driver.Navigate().GoToUrl(url);
            Thread.Sleep(5000);
            var source = driver.PageSource;

            try
            {
                var openScraping    = new StructuredDataExtractor(config);
                var scrapingResults = openScraping.Extract(source);

                JObject jObject = JObject.Parse(scrapingResults.ToString());
                JToken  json    = jObject["calculation"];
                var     temp1   = (string)json[0];
                var     temp2   = (string)json[2];

                var currentcalculation = GetFloat(temp1);
                var dailycalculation   = GetFloat(temp2);
                var unit = GetString(temp1);

                JToken json2 = jObject["status"];

                var temp    = (string)json2;
                var numbers = Regex.Split(temp.Trim(), @"\D+");

                //bool flag = false;
                //var list = new List<int>();
                //string tmp = string.Empty;
                //for(int i = 0; i < temp.Length; i++)
                //{
                //    if (Char.IsDigit(temp[i]))
                //    {
                //        tmp += temp[i];
                //        flag = true;
                //    }
                //    else
                //    {
                //        if(flag == true)
                //        {
                //            flag = false;
                //            list.Add(Int32.Parse(tmp));
                //            tmp = string.Empty;
                //        }
                //    }
                //}

                int active   = Int32.Parse(numbers[1]);
                int inactive = Int32.Parse(numbers[2]);
                int dead     = Int32.Parse(numbers[3]);

                UpdateSummary(currentcalculation, dailycalculation, unit, active, inactive, dead, poolid);
            }
            catch (Exception ex)
            {
                if (Commonflag.flag4 != true)
                {
                    // do nothing
                }
                else
                {
                    string error = "Poolid=" + poolid + "  " + ex.ToString();
                    Commonflag.flag4 = false;
                    UpdateErrorLog("huobi", error);
                }
            }
            finally
            {
                driver.Close();
                driver.Quit();
            }
        }
Ejemplo n.º 29
0
        public static void Worker2(int poolid)
        {
            var    jsonConfig = File.ReadAllText(@"Json\\poolin.json");
            var    config     = StructuredDataConfig.ParseJsonString(jsonConfig);
            string html;
            string url = GetUrl(poolid);

            //using (WebClient client = new WebClient())
            //{
            //    client.Encoding = Encoding.UTF8;
            //    //client.Headers.Add(HttpRequestHeader.UserAgent, "test");
            //    //client.Credentials = CredentialCache.DefaultCredentials;
            //    html = client.DownloadString(url);
            //}

            var driver = new ChromeDriver();

            //var homeURL = "https://www.poolin.com/my/9007375/btc/miners?read_token=wowavEpSkh6wX7yePaQ4wcsfbPKPWNBlxkqppuYlJNvm4NUHUBoLCzAKhj4QTblH";
            driver.Navigate().GoToUrl(url);
            //IWebElement element = driver.FindElement(By.XPath("//table"));
            var source = driver.PageSource;

            driver.Close();
            driver.Quit();

            var openScraping    = new StructuredDataExtractor(config);
            var scrapingResults = openScraping.Extract(source);
            //var json = JsonConvert.SerializeObject(scrapingResults);
            var jsonres = JsonConvert.DeserializeObject(scrapingResults.ToString());

            foreach (var item in (dynamic)jsonres)
            {
                //string checkquery = "SELECT COUNT(*) FROM worker where poolid = '" + poolid + "' and workername = '" + item.workername + "' ";
                //var count = db.Query<int>(checkquery).FirstOrDefault();
                var count = 0;

                if (count == 0)
                {
                    if (item.currenthash == "0" || item.currenthash == "0.00")
                    {
                        //
                        Worker worker = new Worker();
                        worker.poolid          = poolid;
                        worker.workername      = item.workername;
                        worker.currenthashrate = item.currenthash;
                        worker.dailyhashrate   = item.dailyhash;
                        worker.rejected        = item.rejected;
                        worker.updateat        = DateTime.Now;
                        worker.isactive        = false;
                        Add(worker);
                    }
                    else if (item.currenthash == null)
                    {
                        continue;
                    }
                    else
                    {
                        //DateTime datetime = Convert.ToDateTime(date);
                        // insert to
                        Worker worker = new Worker();
                        worker.poolid          = poolid;
                        worker.workername      = item.workername;
                        worker.currenthashrate = item.currenthash;
                        worker.dailyhashrate   = item.dailyhash;
                        worker.rejected        = item.rejected;
                        worker.updateat        = DateTime.Now;
                        worker.isactive        = true;
                        Add(worker);
                    }
                }
                else
                {
                    bool   isactive = (item.currenthash == "0" || item.currenthash == "0.00") ? false : true;
                    Worker worker   = new Worker();
                    worker.poolid          = poolid;
                    worker.workername      = item.workername;
                    worker.currenthashrate = item.currenthash;
                    worker.dailyhashrate   = item.dailyhash;
                    worker.rejected        = item.rejected;
                    worker.updateat        = DateTime.Now;
                    worker.isactive        = isactive;
                    Update(worker);
                }

                //Console.WriteLine("{0} {1} {2} {3}\n", item.workername, item.currenthash,
                //    item.dailyhash, item.rejected);
            }

            //foreach (var item in (dynamic)jsonres)
            //{
            //    Worker worker = new Worker();
            //    worker.poolid = poolid;
            //    worker.workername = item.workername;
            //    worker.currenthashrate = item.currenthash;
            //    worker.dailyhashrate = item.dailyhash;
            //    worker.rejected = item.rejected;
            //    worker.updateat = DateTime.Now;
            //    worker.isactive = true;
            //    worker.currentcalculation = item.currenthashtotal;
            //    worker.dailycalculation = item.dailyhashtotal;
            //    UpdateSummary(worker);
            //    break;
            //}

            //Console.ReadKey();
        }
Ejemplo n.º 30
0
        public ResultReturn pushDataToCategory([FromForm] ResultList result1)
        {
            try
            {
                {
                    // Lấy ra danh sách url strong start_url cần bổ sung
                    var urls = entities.start_url.Where(m => result1.result.Contains(m.ID)).ToList();
                    if (urls != null)
                    {
                        var         countUrl = 0;
                        MyWebClient client   = new MyWebClient()
                        {
                            Encoding = Encoding.UTF8
                        };
                        client.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36";

                        foreach (var url1 in urls)
                        {
                            try
                            {
                                // Kiểm tra xem url có hợp lệ không
                                var  urltmp = url1.url;
                                bool isUrl  = Uri.TryCreate(urltmp, UriKind.Absolute, out Uri baseUri) &&
                                              (baseUri.Scheme == Uri.UriSchemeHttp || baseUri.Scheme == Uri.UriSchemeHttps);
                                if (isUrl)
                                {
                                    var myDomain = baseUri.Authority;
                                    // Loại bỏ www ở đầu domain
                                    myDomain = myDomain.StartsWith("www.") ? myDomain.Substring(4) : myDomain;
                                    // lấy ra cấu hình của domain
                                    var domains = entities.Domain.Where(domain => (myDomain == domain.Domain1 || myDomain.EndsWith("." + domain.Domain1)) &&
                                                                        domain.Type == 2).ToList();

                                    if (domains.Count > 0)
                                    {
                                        Domain domain = null;
                                        foreach (Domain domaintmp in domains)
                                        {
                                            if (domaintmp.Domain1 == myDomain)
                                            {
                                                domain = domaintmp;
                                                break;
                                            }
                                        }
                                        if (domain == null)
                                        {
                                            domain = domains.FirstOrDefault();
                                        }

                                        var data        = client.DownloadData(urltmp);
                                        var contentType = client.ResponseHeaders["Content-Type"];
                                        // Kiểm tra xem nội dung của trang web có phải là html hay không
                                        if (contentType.StartsWith(@"text/"))
                                        {
                                            var          jsonConfig = domain.Content;
                                            var          config     = StructuredDataConfig.ParseJsonString(jsonConfig);
                                            HtmlDocument docc       = new HtmlDocument();
                                            var          html       = Encoding.UTF8.GetString(data);
                                            docc.LoadHtml(html);
                                            var url = "";
                                            // Chuyển link động trong trang web thành link tĩnh
                                            HtmlNodeCollection nodes = docc.DocumentNode.SelectNodes("//a");
                                            if (nodes != null)
                                            {
                                                foreach (HtmlNode node in nodes)
                                                {
                                                    if ((node.Attributes["href"] != null) && (node.Attributes["href"].Value != ""))
                                                    {
                                                        try
                                                        {
                                                            url = node.Attributes["href"].Value.Trim();
                                                            node.Attributes["href"].Value = new Uri(baseUri, url).AbsoluteUri;
                                                        }
                                                        catch { }
                                                    }
                                                }
                                            }
                                            ;
                                            html = docc.DocumentNode.InnerHtml;
                                            // Bóc tách ra danh sách chủ đề của domain dựa vào cấu hình
                                            var openScraping    = new StructuredDataExtractor(config);
                                            var scrapingResults = openScraping.Extract(html);
                                            var result          = JsonConvert.SerializeObject(scrapingResults, Formatting.Indented);
                                            // Lấy ra danh sách chủ đề trong trang báo
                                            if (scrapingResults.Count > 0)
                                            {
                                                var    o     = JObject.Parse(result);
                                                JToken token = o["link"];
                                                if (token != null)
                                                {
                                                    List <url_crawl_list> listCrawl = new List <url_crawl_list>()
                                                    {
                                                    };
                                                    int count  = 0;
                                                    var lsturl = token is JArray ? ((JArray)token).Select(m => m?.ToString()?.Trim()).Where(m => !string.IsNullOrWhiteSpace(m)).Distinct().ToList() : new List <string>()
                                                    {
                                                        token.ToString()
                                                    };
                                                    foreach (var valuetmp in lsturl)
                                                    {
                                                        if (valuetmp.Contains(myDomain) && valuetmp.StartsWith("http"))
                                                        {
                                                            listCrawl.Add(new url_crawl_list {
                                                                url = valuetmp, status = 1, domain = myDomain, interval = result1.interval, module = result1.module, schedule_time = DateTime.Now
                                                            });
                                                            count++;
                                                        }
                                                    }
                                                    if (count > 0)
                                                    {
                                                        countUrl++;
                                                        // Thêm danh sách url chủ đề vào bảng url_crawl_list
                                                        //entities.Database.ExecuteSqlRaw("usp_url_crawl_list_addList"
                                                        //, UrlCrawlListParameters("@urlCrawlList", listCrawl));
                                                        entities.Database.ExecuteSqlCommand(new RawSqlString("usp_url_crawl_list_addList")
                                                                                            , UrlCrawlListParameters("@urlCrawlList", listCrawl));
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                            catch (Exception)
                            {
                            }
                        }
                        if (countUrl > 0)
                        {
                            return(new ResultReturn("Thêm thành công " + countUrl + " url !", 1));
                        }
                        return(new ResultReturn("Không thêm được url nào, vui lòng xem lại cấu hình!", -1));
                    }
                    else
                    {
                        return(new ResultReturn("Không tìm thấy bản ghi nào!", -1));
                    }
                }
            }
            catch
            {
                return(new ResultReturn("Thêm thất bại!", -1));
            }
        }