コード例 #1
0
        public CsvLine Grub(string addr)
        {
            string LoadPage(string url)     //HtmlAgilityPack initial page load module
            {
                var result   = "";
                var request  = (HttpWebRequest)WebRequest.Create(url);
                var response = (HttpWebResponse)request.GetResponse();

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    var receiveStream = response.GetResponseStream();
                    if (receiveStream != null)
                    {
                        StreamReader readStream;
                        if (response.CharacterSet == null)
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        else
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        result = readStream.ReadToEnd();
                        readStream.Close();
                    }
                    response.Close();
                }
                return(result);
            }

            /*static string address(string addr)    //not needed anymore, but I'll leave it here for a while
             * {
             *  string addressTemp = "@" + addr;
             *  return addressTemp.ToString();
             * }
             */
            var pageContent = LoadPage(addr);
            var document    = new HtmlDocument(); //Creating new page to parse

            document.LoadHtml(pageContent);       //Creating new page to parse
            HtmlNode grubId(string id)            //Guitar-World description node
            {
                var grubberId = document.DocumentNode.SelectSingleNode("//div[@id='" + id + "']");

                return(grubberId);
            }

            string grubContent(string content)  //Guitar-World node selector by content
            {
                var    nodeContent = document.DocumentNode.SelectSingleNode("//tr[th='" + content + "']");
                var    nodeMed     = nodeContent.SelectSingleNode("td");
                string nodeValue   = nodeMed.InnerText;

                return(nodeValue);
            }

            string grubBrand(string labelBrand)       //labelBrand = "Производитель:" for Guitar-World
            {
                var    nodeBrand  = document.DocumentNode.SelectSingleNode("//div[label='" + labelBrand + "']");
                var    nodePBrand = nodeBrand.SelectSingleNode("p");
                string textBrand  = nodePBrand.InnerText;

                Console.WriteLine(textBrand);
                return(textBrand);
            }

            var    desc    = grubId("descr");   //var with "description" part
            string descRes = desc.InnerText;    //useless(?)

            ///////////
            ///
            string grubPrice(string labelPrice)       //labelPrice = "product-price" for Guitar-World
            {
                try
                {
                    var nodePrice = document.DocumentNode.SelectSingleNode("//span[@class=\"" + labelPrice + "\"]");
                    var nodeCheck = nodePrice.ChildNodes;
                    Console.WriteLine(nodeCheck.Count);
                    Console.WriteLine(nodeCheck);
                    //Console.WriteLine(nodeCheck.NextSibling.Name);
                    //Console.WriteLine(nodeCheck.NextSibling.InnerText);
                    switch (nodeCheck.Count)
                    {
                    case 1:
                        Console.WriteLine(nodePrice);                     //single span (no discount)
                        string textPrice = nodePrice.InnerText;
                        Console.WriteLine(textPrice);
                        string temp = Regex.Replace(                      //removing spaces
                            textPrice, @"\s", "");
                        return(temp.Replace("руб.", ""));                 //removing letters

                    case 3:                                               //multiple spans (discount)
                        var    nodeSCheck = nodePrice.FirstChild.NextSibling;
                        string textDPrice = nodeSCheck.InnerText;
                        Console.WriteLine(textDPrice);
                        string tempD = Regex.Replace(                      //removing spaces
                            textDPrice, @"\s", "");
                        return(tempD.Replace("руб.", ""));                 //removing letters

                    default:
                        return("WTF");
                    }
                }
                catch
                {
                    Console.WriteLine("Unable to grub price!");
                    return("");
                }
            }

            string grubModel(string labelModel)       //labelModel = "Артикул:" for Guitar-World
            {
                var    nodeModel  = document.DocumentNode.SelectSingleNode("//div[label='" + labelModel + "']");
                var    nodePModel = nodeModel.SelectSingleNode("p");
                string textModel  = nodePModel.InnerText;

                Console.WriteLine(textModel);
                return(textModel);
            }

            string grubName(string Name)            //Name = "header-for-light" for Guitar-World
            {
                var    nodeName   = document.DocumentNode.SelectSingleNode("//div[@class=\"" + Name + "\"]");
                var    nodeH1Name = nodeName.SelectSingleNode("h1");
                string nodeHName  = nodeH1Name.InnerText;

                Console.WriteLine(nodeHName);
                return(Regex.Replace(              //removing whitespaces
                           nodeHName, @"^\s+|\s+$", ""));
            }

            string grubImg(string ImgAddr)
            {
                var    nodeImgAddr = document.DocumentNode.SelectSingleNode("//a[@data-fancybox-group=\"" + ImgAddr + "\"]");
                string imgLink     = "https://www.guitar-world.ru" + nodeImgAddr.Attributes["href"].Value;

                Console.WriteLine(imgLink);
                Console.WriteLine(nodeImgAddr.Attributes["href"].Value);
                return(imgLink);
            }

            ///////////
            string brand = grubBrand("Производитель:");         //Getting value for brand

            string numberOfStringsMethod()
            {
                try
                {
                    string temp = grubContent("Количество струн"); //Getting value for # of strings variable
                    return(temp);
                }
                catch
                {
                    return("6");
                }
            }

            string numberOfStrings = numberOfStringsMethod(); //Assigning value for # of strings variable

            string bodyMethod()
            {
                try
                {
                    string temp = grubContent("Корпус");    //getting value for body wood
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string body = bodyMethod();    //Assigning value for body wood

            string topWoodmethod()
            {
                try

                {
                    string topWoodTemp = grubContent("Верх корпуса");       //Getting value for top wood
                    return(topWoodTemp);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                    return("-");
                }
            }

            string topWood = topWoodmethod();   //Assigning value for top wood

            string constructionMethod()
            {
                try
                {
                    string temp = grubContent("Конструкция");  //Getting value for neck construction
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string construction = constructionMethod();  //Assigning value for neck construction

            string scaleMethod()
            {
                try
                {
                    string temp = grubContent("Мензура");         //Getting value for scale lenght
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string scale = scaleMethod();         //Assigning value for scale lenght

            string neckwoodMethod()
            {
                try
                {
                    string temp = grubContent("Гриф");      //Getting value for neck wood
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string neckWood = neckwoodMethod();      //Assigning value for neck wood

            string fretboardMethod()
            {
                try
                {
                    string fretboardTemp = grubContent("Накладка грифа");     //Getting value for fretboard wood
                    return(fretboardTemp);
                }
                catch (Exception e)
                {
                    {
                        try
                        {
                            Console.WriteLine("Failed a bit with fretboard node type" + e.Message);
                            string fretboardTemp = grubContent("Накладка на гриф");
                            return(fretboardTemp);
                        }
                        catch
                        {
                            return("");
                        }
                    }
                }
            }

            string fretboard = fretboardMethod();        //Assigning value for fretboard wood
            ///////
            string name  = grubName("header-for-light"); //Name of guitar
            string price = grubPrice("product-price");
            string model = grubModel("Артикул:");

            //////
            string fretsMethod()
            {
                try
                {
                    string fretsTemp = grubContent("Лады"); //Getting value for number of frets
                    return(fretsTemp);
                }
                catch
                {
                    string fretsTemp = "";
                    return(fretsTemp);
                }
            }

            string numberOfFrets = fretsMethod(); //Assigning to numberOfFrets

            string colorMethod()
            {
                try
                {
                    string colorTemp = grubContent("Цвет");         //Getting value for color
                    return(colorTemp);
                }
                catch
                {
                    try
                    {
                        Console.WriteLine("Color variant 2");
                        string colorTemp = grubContent("Цвет корпуса");
                        return(colorTemp);
                    }
                    catch
                    {
                        return("");
                    }
                }
            }

            string color = colorMethod();    //Assigning value for color

            string bridgeMethod()
            {
                try
                {
                    string bridgeTemp = grubContent("Бридж");        //Getting value for bridge type
                    return(bridgeTemp);
                }
                catch
                {
                    return("");
                }
            }

            string bridge = bridgeMethod();        //Assigning value for bridge type

            string pickups1Method()
            {
                try
                {
                    string pu1Temp = grubContent("Датчик у грифа");       //Getting value for neck pickup
                    return(pu1Temp);
                }
                catch
                {
                    return("");
                }
            }

            string pickups1 = pickups1Method();       //Assigning value for neck pickup

            string pickups2Method()
            {
                try
                {
                    string pu2Temp = grubContent("Датчик у бриджа");       //Getting value for bridge pickup
                    return(pu2Temp);
                }
                catch
                {
                    return("");
                }
            }

            string pickups2 = pickups2Method();     //Assigning value for bridge pickup

            string controlsMethod()
            {
                try
                {
                    string controlsTemp = grubContent("Ручки управления");      //Getting value for controls
                    return(controlsTemp);
                }
                catch
                {
                    return("");
                }
            }

            string controls = controlsMethod();      //Assigning value for controls
            //string misc = grubContent("");          //Getting value for misc (to be changed?)
            string imageTemp = model.Replace(" ", "").Replace("/", "");
            //Regex.Replace(                  //creating temporary image variable and removing spaces
            //              model, @"\s+\/", "");
            string        image = imageTemp + ".png";
            Transliterate trans = new Transliterate();   //Initializing Transliterate class for further usage

            //Console.WriteLine(trans.MyDecoding(body));   //
            string imageAddr()
            {
                try
                {
                    string temp = grubImg("gallery");
                    return(temp);
                }
                catch
                {
                    Console.WriteLine("Unable to grub img");
                    return("");
                }
            }

            string imageAddress = imageAddr();

            Console.WriteLine(imageAddress);

            /*
             * try
             * {
             *  using (WebClient client = new WebClient())
             *  {
             *      client.DownloadFile(imageAddress, @"/Users/olimpinz/Projects/ESP_Parser/ESP_Parser/bin/Debug/netcoreapp3.1/img/" + model + ".png");
             *  }
             * }
             * catch (Exception e)
             * {
             *  Console.WriteLine("Unable to download image");
             *  Console.WriteLine(e.Message);
             * }
             */
            CsvLine ToCsv = new CsvLine();

            ToCsv.name         = name;
            ToCsv.model        = model;
            ToCsv.price        = price;
            ToCsv.categories   = "Гитары > Электрогитары"; //для электрогитар
            ToCsv.quantity     = 2;
            ToCsv.manufacturer = brand;
            ToCsv.description  = desc.InnerText;
            ToCsv.attributes   = @"Количество струн : " + numberOfStrings + "\n" +
                                 "Материал корпуса : " + trans.MyDecoding(body) + "\n" +
                                 "Бренд : " + brand + "\n" +
                                 "Материал топа : " + trans.MyDecoding(topWood) + "\n" +
                                 "Крепление грифа : " + trans.MyDecoding(construction) + "\n" +
                                 "Мензура : " + trans.MyDecoding(scale) + "\n" +
                                 "Материал грифа : " + trans.MyDecoding(neckWood) + "\n" +
                                 "Материал накладки : " + trans.MyDecoding(fretboard) + "\n" +
                                 "Количество ладов : " + trans.MyDecoding(numberOfFrets) + "\n" +
                                 "Цвет : " + trans.MyDecoding(color) + "\n" +
                                 "Бридж : " + trans.MyDecoding(bridge) + "\n" +
                                 "Звукосниматели : " + trans.MyDecoding(pickups1) + trans.MyDecoding(pickups2) + "\n" +
                                 "Органы управления : " + HttpUtility.HtmlDecode(trans.MyDecoding(controls)) + "\n" +
                                 "Прочее : ";
            ToCsv.attributes_group = @"гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары
гитары";
            ToCsv.options          = "";
            ToCsv.option_type      = "";
            ToCsv.images           = "/catalog/eguitars/" + image;
            CsvLine lister(string addr)
            {
                Grub(addr);
                return(ToCsv);
            }

            return(ToCsv);
        }
コード例 #2
0
        public CsvLine Grub(string addr)
        {
            string LoadPage(string url)     //HtmlAgilityPack initial page load module
            {
                var result   = "";
                var request  = (HttpWebRequest)WebRequest.Create(url);
                var response = (HttpWebResponse)request.GetResponse();

                Console.WriteLine("Current content link is " + url);
                if (response.StatusCode == HttpStatusCode.OK)
                {
                    var receiveStream = response.GetResponseStream();
                    if (receiveStream != null)
                    {
                        StreamReader readStream;
                        if (response.CharacterSet == null)
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        else
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        result = readStream.ReadToEnd();
                        readStream.Close();
                    }
                    response.Close();
                }
                return(result);
            }

            /*static string address(string addr)    //not needed anymore, but I'll leave it here for a while
             * {
             *  string addressTemp = "@" + addr;
             *  return addressTemp.ToString();
             * }
             */
            var pageContent = LoadPage(addr);
            var document    = new HtmlDocument(); //Creating new page to parse

            document.LoadHtml(pageContent);       //Creating new page to parse
            HtmlNode grubId(string id)            //Guitar-World description node
            {
                var grubberId = document.DocumentNode.SelectSingleNode("//div[@id=\"" + id + "\"]");

                return(grubberId);
            }

            string grubContent(string content)  //Guitar-World node selector by content
            {
                var    nodeContent = document.DocumentNode.SelectSingleNode("//tr[td=\"" + content + "\"]");
                var    nodeMed     = nodeContent.LastChild;
                string nodeValue   = nodeMed.InnerText;

                return(nodeValue);
            }

            ////// Full specs table below
            List <string> grubSpecs = new List <string>();    //making list for all specs availible (which are different at every page

            {
                try
                {
                    var TableNode = document.DocumentNode.SelectSingleNode("//table[@class=\"shop_attributes\"]").SelectSingleNode("tbody");
                    var SpecName  = TableNode.SelectNodes("tr/td[@itemprop=\"name\"]");
                    var SpecValue = TableNode.SelectNodes("tr/td[@itemprop=\"value\"]");
                    int i         = 0;
                    foreach (HtmlNode count in SpecName)
                    {
                        grubSpecs.Add(Regex.Replace(SpecName[i].InnerText, @"^\s+|\s+$|\n", "") + " : " + Regex.Replace(SpecValue[i].InnerText, @"^\s+|\s+$|\n", ""));
                        i++;
                    }
                }
                catch
                {
                    Console.WriteLine("No specs!");
                }
            }
            string Specs()  //turning specs list into string
            {
                string SpecsTemp = "";
                int    SpecCount = 0;

                foreach (string spec in grubSpecs)
                {
                    SpecsTemp += spec + "\n";
                    SpecCount++;
                }
                return(SpecsTemp);
            }

            string attr_group()
            {
                int    i    = 0;
                string temp = "";

                foreach (string spec in grubSpecs)
                {
                    temp += "\n Акустические системы";
                }
                return(temp);
            }

            string grubBrand()       //labelBrand = "Производитель:" for Guitar-World
            {
                var nodeBrand = document.DocumentNode.SelectSingleNode("//b[@id=\"prodbrand\"]");

                //var nodePBrand = nodeBrand.SelectSingleNode("p");
                //string textBrand = nodePBrand.InnerText;
                Console.WriteLine(nodeBrand.InnerText);
                return(nodeBrand.InnerText);
            }

            var desc = grubId("tab-fullDescriptionProd");         //var with "description" part
            var feat = grubId("tab-featuresProd");                ///////////

            ///
            string grubPrice()       //itemprop = "price" for Amplifier.ru
            {
                try
                {
                    var nodePrice = document.DocumentNode.SelectSingleNode("//span[@itemprop=\"price\"]");
                    return(nodePrice.InnerText.Replace("\"", "").Replace(" ", ""));
                    //Console.WriteLine(nodeCheck.NextSibling.Name);
                    //Console.WriteLine(nodeCheck.NextSibling.InnerText);

                    /*switch (nodeCheck.Count)
                     * {
                     *  case 1:
                     *      Console.WriteLine(nodePrice);                 //single span (no discount)
                     *      string textPrice = nodePrice.InnerText;
                     *      Console.WriteLine(textPrice);
                     *      string temp = Regex.Replace(                  //removing spaces
                     *          textPrice, @"\s", "");
                     *      return temp.Replace("руб.", "");              //removing letters
                     *
                     *  case 3:                                           //multiple spans (discount)
                     *      var nodeSCheck = nodePrice.FirstChild.NextSibling;
                     *      string textDPrice = nodeSCheck.InnerText;
                     *      Console.WriteLine(textDPrice);
                     *      string tempD = Regex.Replace(                  //removing spaces
                     *          textDPrice, @"\s", "");
                     *      return tempD.Replace("руб.", "");              //removing letters
                     *  default:
                     *      return "WTF";
                     * }*/
                }
                catch
                {
                    Console.WriteLine("Unable to grub price!");
                    return("");
                }
            }

            string grubModel()       //itemprop = "model" for Amplifier
            {
                var nodeModel = document.DocumentNode.SelectSingleNode("//span[@itemprop=\"model\"]");

                //var nodePModel = nodeModel.SelectSingleNode("p");
                //string textModel = nodePModel.InnerText;
                Console.WriteLine(nodeModel.InnerText);
                return(nodeModel.InnerText);
            }

            string grubName()            //h1 id = "prodtitle" for Amplifier
            {
                var nodeName = document.DocumentNode.SelectSingleNode("//h1[@id=\"prodtitle\"]");

                //var nodeH1Name = nodeName.SelectSingleNode("h1");
                //string nodeHName = nodeH1Name.InnerText;
                Console.WriteLine(nodeName.InnerText);
                return(Regex.Replace(              //removing whitespaces
                           nodeName.InnerText, @"^\s+|\s+$", ""));
            }

            string grubImgTemp()
            {
                var    nodeImgAddr = document.DocumentNode.SelectSingleNode("//img[@class=\"image_0\"]");
                string imgLink     = "https://www.amplifier.ru" + nodeImgAddr.Attributes["src"].Value;

                //Console.WriteLine(imgLink);
                return(imgLink);
            }

            ///////////
            string brand = grubBrand();         //Getting value for brand

            string ampMethod()
            {
                try
                {
                    string temp = grubContent("&Ucy;&scy;&icy;&lcy;&icy;&tcy;&iecy;&lcy;&softcy;"); //Getting value for amp
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string amp = ampMethod(); //Assigning value for # of strings variable

            string sensMethod()
            {
                try
                {
                    string temp = grubContent("&CHcy;&ucy;&vcy;&scy;&tcy;&vcy;&icy;&tcy;&iecy;&lcy;&softcy;&ncy;&ocy;&scy;&tcy;&softcy;");    //getting value for body wood
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string sens = sensMethod();    //Assigning value for body wood

            string outputMethod()
            {
                try

                {
                    string temp = grubContent("&Vcy;&khcy;&ocy;&dcy;&ncy;&ycy;&iecy; &rcy;&acy;&zcy;&hardcy;&iecy;&mcy;&ycy;");       //Getting value for outputs
                    return(temp);
                }
                catch
                {
                    return("-");
                }
            }

            string output = outputMethod();   //Assigning value for top wood

            string ohmMethod()
            {
                try
                {
                    string temp = grubContent("&Scy;&ocy;&pcy;&rcy;&ocy;&tcy;&icy;&vcy;&lcy;&iecy;&ncy;&icy;&iecy;");  //Getting value for ohmage
                    return(temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Ncy;&ocy;&mcy;&icy;&ncy;&acy;&lcy;&softcy;&ncy;&ocy;&iecy; &scy;&ocy;&pcy;&rcy;&ocy;&tcy;&icy;&vcy;&lcy;&iecy;&ncy;&icy;&iecy;");
                        return(temp);
                    }
                    catch
                    {
                        return("");
                    }
                }
            }

            string ohm = ohmMethod();  //Assigning value for ohmage

            string inputMethod()
            {
                try
                {
                    string temp = grubContent("&Pcy;&ocy;&dcy;&kcy;&lcy;&yucy;&chcy;&iecy;&ncy;&icy;&iecy;");         //Getting value for inputs
                    return(temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Vcy;&khcy;&ocy;&dcy;&ycy;");         //Getting value for inputs
                        return(temp);
                    }
                    catch
                    {
                        try
                        {
                            string temp = grubContent("&Vcy;&khcy;&ocy;&dcy;&ncy;&ycy;&iecy; &rcy;&acy;&zcy;&hardcy;&iecy;&mcy;&ycy;");
                            return(temp);
                        }
                        catch
                        {
                            return("");
                        }
                    }
                }
            }

            string input = inputMethod();         //Assigning value for input

            string powerMethod()
            {
                try
                {
                    string temp = grubContent("&Mcy;&ocy;&shchcy;&ncy;&ocy;&scy;&tcy;&softcy;");      //Getting value power
                    return(temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Ncy;&ocy;&mcy;&icy;&ncy;&acy;&lcy;&softcy;&ncy;&acy;&yacy; &mcy;&ocy;&shchcy;&ncy;&ocy;&scy;&tcy;&softcy; RMS");
                        return(temp);
                    }
                    catch
                    {
                        return("");
                    }
                }
            }

            string power = powerMethod();      //Assigning value for power

            string responseMethod()
            {
                try
                {
                    string temp = grubContent(">&CHcy;&acy;&scy;&tcy;&ocy;&tcy;&ncy;&ycy;&jcy; &dcy;&icy;&acy;&pcy;&acy;&zcy;&ocy;&ncy;");     //Getting value for response
                    return(temp);
                }
                catch
                {
                    {
                        try
                        {
                            string temp = grubContent("&Ncy;&acy;&kcy;&lcy;&acy;&dcy;&kcy;&acy; &ncy;&acy; &gcy;&rcy;&icy;&fcy;");
                            return(temp);
                        }
                        catch
                        {
                            try
                            {
                                string temp = grubContent("&CHcy;&acy;&scy;&tcy;&ocy;&tcy;&ncy;&ycy;&jcy; &dcy;&icy;&acy;&pcy;&acy;&zcy;&ocy;&ncy; &lpar;-10 &dcy;&Bcy;&rpar;");
                                return(temp);
                            }
                            catch
                            {
                                return("");
                            }
                        }
                    }
                }
            }

            string response = responseMethod(); //Assigning value for response
            ///////
            string name  = grubName();          //Name of system
            string price = grubPrice();         //Price
            string model = grubModel();         //Model

            string splMethod()
            {
                try
                {
                    string temp = grubContent("&Zcy;&vcy;&ucy;&kcy;&ocy;&vcy;&ocy;&iecy; &dcy;&acy;&vcy;&lcy;&iecy;&ncy;&icy;&iecy; SPL"); //Getting value for spl
                    return(temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Zcy;&vcy;&ucy;&kcy;&ocy;&vcy;&ocy;&iecy; &dcy;&acy;&vcy;&lcy;&iecy;&ncy;&icy;&iecy; SPL"); //Getting value for spl
                        return(temp);
                    }
                    catch
                    {
                        return("");
                    }
                }
            }

            string spl = splMethod(); //Assigning to numberOfFrets

            string dimMethod()
            {
                try
                {
                    string temp = grubContent("&Rcy;&acy;&zcy;&mcy;&iecy;&rcy;&ycy;");         //Getting value for color
                    return(temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Gcy;&acy;&bcy;&acy;&rcy;&icy;&tcy;&ycy;");
                        return(temp);
                    }
                    catch
                    {
                        try
                        {
                            string temp = grubContent("&Gcy;&acy;&bcy;&acy;&rcy;&icy;&tcy;&ycy; &lpar;&Vcy;x&SHcy;x&Gcy;&rpar;");
                            return(temp);
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message);
                            return("");
                        }
                    }
                }
            }

            string dim = dimMethod();    //Assigning value for color

            string weightMethod()
            {
                try
                {
                    string temp = grubContent("&Vcy;&iecy;&scy;");        //Getting value for bridge type
                    return(temp);
                }
                catch
                {
                    return("");
                }
            }

            string weight = weightMethod();        //Assigning value for bridge type

            string speaker1Method()
            {
                try
                {
                    string sp1Temp = grubContent("&Ncy;&CHcy; &dcy;&icy;&ncy;&acy;&mcy;&icy;&kcy;");       //Getting value for neck pickup
                    return(sp1Temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Dcy;&icy;&ncy;&acy;&mcy;&icy;&kcy;&icy;");
                        return(temp);
                    }
                    catch
                    {
                        try
                        {
                            string temp = grubContent("&Icy;&zcy;&lcy;&ucy;&chcy;&acy;&tcy;&iecy;&lcy;&icy;");
                            return(temp);
                        }
                        catch
                        {
                            return("");
                        }
                    }
                }
            }

            string speaker1 = speaker1Method();       //Assigning value for neck pickup

            string speaker2Method()
            {
                try
                {
                    string sp2Temp = grubContent("&Vcy;&CHcy; &dcy;&rcy;&acy;&jcy;&vcy;&iecy;&rcy;");       //Getting value for bridge pickup
                    return(sp2Temp);
                }
                catch
                {
                    try
                    {
                        string temp = grubContent("&Ncy;&CHcy; &icy;&zcy;&lcy;&ucy;&chcy;&acy;&tcy;&iecy;&lcy;&softcy;");
                        return(temp);
                    }
                    catch
                    {
                        return("");
                    }
                }
            }

            string speaker2 = speaker2Method();     //Assigning value for bridge pickup

            string controlsMethod()
            {
                try
                {
                    string controlsTemp = grubContent("&Rcy;&ucy;&chcy;&kcy;&icy; &ucy;&pcy;&rcy;&acy;&vcy;&lcy;&iecy;&ncy;&icy;&yacy;");      //Getting value for controls
                    return(controlsTemp);
                }
                catch
                {
                    return("");
                }
            }

            string controls = controlsMethod();      //Assigning value for controls
                                                     //string misc = grubContent("");          //Getting value for misc (to be changed?)
            string imageTemp = model.Replace(" ", "").Replace("/", "");
            //Regex.Replace(                  //creating temporary image variable and removing spaces
            //              model, @"\s+\/", "");l
            string        image = imageTemp + ".png";
            Transliterate trans = new Transliterate();   //Initializing Transliterate class for further usage

            //Console.WriteLine(trans.MyDecoding(body));   //
            string imageAddr()
            {
                try
                {
                    string temp = grubImgTemp();
                    return(temp);
                }
                catch
                {
                    Console.WriteLine("Unable to grub img");
                    return("");
                }
            }

            string imageAddress = imageAddr();
            //Console.WriteLine(imageAddress);
            CsvLine ToCsv = new CsvLine();

            ToCsv.name         = name;
            ToCsv.model        = model;
            ToCsv.price        = price.Replace(" ", "").Replace("р.", "");
            ToCsv.categories   = "Звуковое оборудование > Пассивные акустические системы"; //для акустических систем
            ToCsv.quantity     = 2;
            ToCsv.manufacturer = brand;
            if (feat != null)
            {
                ToCsv.description = desc.InnerHtml + "\n<h3>Особенности</h3>\n" + feat.InnerHtml;
            }
            else
            {
                ToCsv.description = desc.InnerHtml;
            }
            if (Specs() != null)
            {
                ToCsv.attributes = trans.MyDecoding(Specs());
            }
            else
            {
                ToCsv.attributes = "";
            }

            /*@"Усилитель : " + trans.MyDecoding(amp) + "\n" +
             *                  "Мощность : " + trans.MyDecoding(power) + "\n" +
             *                  "Динамики : " + trans.MyDecoding(speaker1) + " " + trans.MyDecoding(speaker2) +  "\n" +
             *                  "Бренд : " + brand + "\n" +
             *                  "Частотный диапазон : " + trans.MyDecoding(response) + "\n" +
             *                  "Входы : " + trans.MyDecoding(input) + "\n" +
             *                  "Выходы : " + trans.MyDecoding(output) + "\n" +
             *                  "Сопротивление : " + trans.MyDecoding(ohm) + "\n" +
             *                  "Максимальный УЗД/SPL : " + trans.MyDecoding(spl) + "\n" +
             *                  "Чувствительность : " + trans.MyDecoding(sens) + "\n" +
             *                  "Габариты : " + trans.MyDecoding(dim) + "\n" +
             *                  "Вес : " + trans.MyDecoding(weight) + "\n" +
             *              //    "Бридж : " + trans.MyDecoding(bridge) + "\n" +
             *              //    "Звукосниматели : " + trans.MyDecoding(pickups1) + trans.MyDecoding(pickups2) + "\n" +
             *              //    "Органы управления : " + HttpUtility.HtmlDecode(trans.MyDecoding(controls)) + "\n" +
             *                  "Прочее : ";*/
            ToCsv.attributes_group = attr_group();

            /*@"Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы
            *  Акустические системы";*/
            ToCsv.options     = "";
            ToCsv.option_type = "";
            ToCsv.images      = "/catalog/pls/" + image;
            CsvLine lister(string addr)
            {
                Grub(addr);
                return(ToCsv);
            }

            return(ToCsv);
        }
コード例 #3
0
        public virtual CsvLine Grub(string addr)
        {
            string LoadPage(string url)     //HtmlAgilityPack initial page load module
            {
                var result   = "";
                var request  = (HttpWebRequest)WebRequest.Create(url);
                var response = (HttpWebResponse)request.GetResponse();

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    var receiveStream = response.GetResponseStream();
                    if (receiveStream != null)
                    {
                        StreamReader readStream;
                        if (response.CharacterSet == null)
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        else
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        result = readStream.ReadToEnd();
                        readStream.Close();
                    }
                    response.Close();
                }
                return(result);
            }

            var pageContent = LoadPage(addr);
            var document    = new HtmlDocument(); //Creating new page to parse

            document.LoadHtml(pageContent);       //Creating new page to parse
            HtmlNode grubId(string id)            //Guitar-World description node
            {
                var grubberId = document.DocumentNode.SelectSingleNode("//div[@id=\"" + id + "\"]");

                return(grubberId);
            }

            ////// Full specs table below
            List <string> grubSpecs = new List <string>();    //making list for all specs availible (which are different at every page

            {
                try
                {
                    var TableNode = document.DocumentNode.SelectSingleNode("//table[@class=\"shop_attributes\"]").SelectSingleNode("tbody");
                    var SpecName  = TableNode.SelectNodes("tr/td[@itemprop=\"name\"]");
                    var SpecValue = TableNode.SelectNodes("tr/td[@itemprop=\"value\"]");
                    int i         = 0;
                    foreach (HtmlNode count in SpecName)
                    {
                        grubSpecs.Add(Regex.Replace(SpecName[i].InnerText, @"^\s+|\s+$|\n", "") + " : " + Regex.Replace(SpecValue[i].InnerText, @"^\s+|\s+$|\n", ""));
                        i++;
                    }
                }
                catch
                {
                    Console.WriteLine("No specs!");
                }
            }
            string Specs()  //turning specs list into string
            {
                string SpecsTemp = "";
                int    SpecCount = 0;

                foreach (string spec in grubSpecs)
                {
                    SpecsTemp += spec + "\n";
                    SpecCount++;
                }
                return(SpecsTemp);
            }

            string attr_group()
            {
                int    i    = 0;
                string temp = "";

                foreach (string spec in grubSpecs)
                {
                    temp += "\n Микрофоны";
                }
                return(temp);
            }

            string grubContent(string content)  // node selector by content
            {
                var    nodeContent = document.DocumentNode.SelectSingleNode("//tr[td=\"" + content + "\"]");
                var    nodeMed     = nodeContent.LastChild;
                string nodeValue   = nodeMed.InnerText;

                return(nodeValue);
            }

            string grubBrand()       //labelBrand = "Производитель:" for Guitar-World
            {
                var nodeBrand = document.DocumentNode.SelectSingleNode("//b[@id=\"prodbrand\"]");

                Console.WriteLine(nodeBrand.InnerText);
                return(nodeBrand.InnerText);
            }

            var desc = grubId("tab-fullDescriptionProd");         //var with "description" part

            HtmlNode featMethod()
            {
                try
                {
                    var temp = grubId("tab-featuresProd");
                    return(temp);
                }
                catch
                {
                    return(null);
                }
            }

            var feat = featMethod();

            string grubPrice()       //itemprop = "price" for Amplifier.ru
            {
                try
                {
                    var nodePrice = document.DocumentNode.SelectSingleNode("//span[@itemprop=\"price\"]");
                    return(nodePrice.InnerText.Replace("\"", "").Replace(" ", ""));
                }
                catch
                {
                    Console.WriteLine("Unable to grub price!");
                    return("");
                }
            }

            string grubModel()       //itemprop = "model" for Amplifier
            {
                var nodeModel = document.DocumentNode.SelectSingleNode("//span[@itemprop=\"model\"]");

                Console.WriteLine(nodeModel.InnerText);
                return(nodeModel.InnerText);
            }

            string grubName()            //h1 id = "prodtitle" for Amplifier
            {
                var nodeName = document.DocumentNode.SelectSingleNode("//h1[@id=\"prodtitle\"]");

                Console.WriteLine(nodeName.InnerText);
                return(Regex.Replace(              //removing whitespaces
                           nodeName.InnerText, @"^\s+|\s+$", ""));
            }

            string grubImgTemp()
            {
                var    nodeImgAddr = document.DocumentNode.SelectSingleNode("//img[@class=\"image_0\"]");
                string imgLink     = "https://www.amplifier.ru" + nodeImgAddr.Attributes["src"].Value;

                Console.WriteLine(imgLink);
                return(imgLink);
            }

            ///////////
            string        brand     = grubBrand();                              //Getting value for brand
            string        imageTemp = Regex.Replace(grubModel(), @"\s|\/", ""); //model.Replace(" ", "").Replace("/", "");
            string        image     = imageTemp + ".png";
            Transliterate trans     = new Transliterate();                      //Initializing Transliterate class for further usage

            //Console.WriteLine(trans.MyDecoding(body));   //
            string imageAddr()
            {
                try
                {
                    string temp = grubImgTemp();
                    return(temp);
                }
                catch
                {
                    Console.WriteLine("Unable to grub img");
                    return("");
                }
            }

            string imageAddress = imageAddr();

            CsvLine ToCsv = new CsvLine();

            ToCsv.name         = grubName();
            ToCsv.model        = grubModel();
            ToCsv.price        = grubPrice().Replace(" ", "").Replace("р.", "");
            ToCsv.categories   = "Микрофоны"; //для микрофонов
            ToCsv.quantity     = 2;
            ToCsv.manufacturer = brand;
            if (feat != null)
            {
                ToCsv.description = desc.InnerHtml + "\n<h3>Особенности</h3>\n" + feat.InnerHtml;
            }
            else
            {
                ToCsv.description = desc.InnerHtml;
            }
            if (Specs() != null)
            {
                ToCsv.attributes = trans.MyDecoding(Specs());
            }
            else
            {
                ToCsv.attributes = "";
            }
            ToCsv.attributes_group = attr_group();
            ToCsv.options          = "";
            ToCsv.option_type      = "";
            ToCsv.images           = "/catalog/mics/" + image;
            CsvLine lister(string addr)
            {
                Grub(addr);
                return(ToCsv);
            }

            return(ToCsv);
        }
コード例 #4
0
        public virtual string[] Grub()
        {
            string[] temp        = new string[9];
            var      pageContent = LoadPage(addr);
            var      document    = new HtmlDocument(); //Creating new page to parse

            document.LoadHtml(pageContent);            //Creating new page to parse

            HtmlNode grubId(string id)                 //Guitar-World description node
            {
                var grubberId = document.DocumentNode.SelectSingleNode(NodeByIdXPath + id + "\"]");

                return(grubberId);
            }

            ////// Full specs table below
            List <string> grubSpecs = new List <string>();    //making list for all specs availible (which are different at every page

            {
                try
                {
                    var TableNode = document.DocumentNode.SelectSingleNode(TableNodeXPath).SelectSingleNode("tbody");
                    var SpecName  = TableNode.SelectNodes(SpecNameNodeXPath);
                    var SpecValue = TableNode.SelectNodes(SpecValueNodeXPath);
                    int i         = 0;
                    foreach (HtmlNode count in SpecName)
                    {
                        grubSpecs.Add(Regex.Replace(SpecName[i].InnerText, @"^\s+|\s+$|\n", "") + " : " + Regex.Replace(SpecValue[i].InnerText, @"^\s+|\s+$|\n", ""));
                        i++;
                    }
                }
                catch
                {
                    try
                    {
                        var TableNode = document.DocumentNode.SelectSingleNode(TableNodeXPath).SelectSingleNode("tbody");
                        var SpecName  = TableNode.SelectNodes("tr/td/h5");
                        int i         = 0;
                        foreach (HtmlNode count in SpecName)
                        {
                            grubSpecs.Add("Особенности : " + Regex.Replace(SpecName[i].InnerText, @"^\s+|\s+$|\n", ""));
                            i++;
                        }
                    }
                    catch
                    {
                        try //grandpianos case
                        {
                            var TableNode = document.DocumentNode.SelectSingleNode(TableNodeXPath);
                            var SpecLine  = TableNode.SelectNodes(SpecNameNodeXPath);
                            int i         = 0;
                            foreach (HtmlNode count in SpecLine)
                            {
                                string SpecName  = Regex.Split(SpecLine[i].InnerText, @"\:")[0];
                                string SpecValue = Regex.Split(SpecLine[i].InnerText, @"\:")[1];
                                grubSpecs.Add(Regex.Replace(SpecName, @"^\s+|\s+$|\n", "") + " : " + Regex.Replace(SpecValue, @"^\s+|\s+$|\n", ""));
                                i++;
                            }
                        }
                        catch
                        {
                            Console.WriteLine("No specs!");
                        }
                    }
                }
            }
            string Specs()  //turning specs list into string
            {
                string SpecsTemp = "";
                int    SpecCount = 0;

                foreach (string spec in grubSpecs)
                {
                    SpecsTemp += spec + "\n";
                    SpecCount++;
                }
                return(SpecsTemp);
            }

            string attr_group()     //making Attribute_group
            {
                int    i    = 0;
                string temp = "";

                foreach (string spec in grubSpecs)
                {
                    temp += "\n " + AttrGroup;
                }
                return(temp);
            }

            string grubBrand()       //getting brand
            {
                try
                {
                    var nodeBrand = document.DocumentNode.SelectSingleNode(BrandNodeXPath);
                    //Console.WriteLine(nodeBrand.InnerText);
                    return(nodeBrand.InnerText);
                }
                catch
                {
                    try
                    {
                        var nodeBrand = document.DocumentNode.SelectNodes(BrandNodeXPath).FindFirst("/i[\"Бренд: \"]");
                        //nodeBrand.RemoveAllChildren();
                        return(Regex.Replace(nodeBrand.InnerText, "\"", ""));
                    }
                    catch
                    {
                        Console.WriteLine(document.DocumentNode.SelectSingleNode(BrandNodeXPath).InnerText);
                        Console.WriteLine("Too bad, no brand found");
                        return("");
                    }

                    /*Console.WriteLine("No brand");
                     * return "";*/
                }
            }

            string Avail()
            {
                try  //amplifier and g-w
                {
                    string AvailClass = document.DocumentNode.SelectSingleNode(AvailXPath).GetAttributeValue("class", "None");
                    Console.WriteLine(AvailClass);
                    switch (AvailClass)
                    {
                    case "label_action success":        //amplifier - availible
                        return("2");

                    case "label_action warning":        //amplifier - not in stock
                        return("0");

                    case "label_action danger":        //amplifier - not in stock
                        return("0");

                    case "green":           //guitar-world - availible or in stock
                        return(document.DocumentNode.SelectSingleNode(AvailXPath).InnerText == "есть в наличии" ? "2" : "0");

                    case "red":         //guitar-world - discontinued, not availible
                        return("0");

                    default:
                        return("0");
                    }
                }
                catch
                {
                    /*try  //guitar-world
                     * {
                     *  string AvailCheck = document.DocumentNode.SelectSingleNode(AvailXPath).InnerText;
                     *  switch (AvailCheck)
                     *  {
                     *      case "есть в наличии":
                     *          Console.WriteLine("Availibility detected and set to 2");
                     *          return "2";
                     *      case "доступен под заказ":
                     *          Console.WriteLine("Availibility detected (not in stock) and set to 0");
                     *          return "0";
                     *      default:
                     *          return "0";
                     *  }
                     *
                     * }
                     * catch
                     * {
                     *  Console.WriteLine("Availibility check failed");
                     *  return "0";
                     * }*/
                    Console.WriteLine("Failed to get avail");
                    return("0");
                }
            }

            HtmlNode desc()
            {
                try
                {
                    var temp = grubId(DescNodeXPath);         //var with "description" part
                    return(temp);
                }
                catch
                {
                    return(null);
                }
            }

            HtmlNode featMethod()
            {
                try
                {
                    var temp = grubId(FeaturesNodeXpath);
                    return(temp);
                }
                catch
                {
                    return(null);
                }
            }

            var feat = featMethod();

            string grubPrice()       //getting price
            {
                if (SiteAddress == "https://www.amplifier.ru")
                {
                    try
                    {
                        var nodePrice = document.DocumentNode.SelectSingleNode(PriceNodeXPath);
                        return(nodePrice.InnerText.Replace("\"", "").Replace(" ", ""));
                    }
                    catch
                    {
                        Console.WriteLine("Unable to grub price!");
                        return("");
                    }
                }
                else if (SiteAddress == "https://www.guitar-world.ru")   //commented in case there'll be more sites
                {
                    try
                    {
                        var nodePrice = document.DocumentNode.SelectSingleNode(PriceNodeXPath);
                        var nodeCheck = nodePrice.ChildNodes;
                        //Console.WriteLine(nodeCheck.Count);
                        //Console.WriteLine(nodeCheck);
                        switch (nodeCheck.Count)
                        {
                        case 1:
                            //Console.WriteLine(nodePrice);                 //single span (no discount)
                            string textPrice = nodePrice.InnerText;
                            //Console.WriteLine(textPrice);
                            string temp = Regex.Replace(  //removing spaces
                                textPrice, @"\s|руб\.", "");
                            return(temp);                 //removing letters

                        case 3:                           //multiple spans (discount)
                            var    nodeSCheck = nodePrice.FirstChild.NextSibling;
                            string textDPrice = nodeSCheck.InnerText;
                            //Console.WriteLine(textDPrice);
                            string tempD = Regex.Replace(  //removing spaces
                                textDPrice, @"\s|руб\.", "");
                            return(tempD);                 //removing letters

                        default:
                            return("");
                        }
                    }
                    catch
                    {
                        Console.WriteLine("Unable to grub price!");
                        return("");
                    }
                }
                else //if (SiteAddress == "https://www.grandpianos.ru")
                {
                    try
                    {
                        var nodePrice = document.DocumentNode.SelectSingleNode(PriceNodeXPath);
                        if (nodePrice.FirstChild != null)
                        {
                            nodePrice.RemoveAllChildren();
                        }
                        return(nodePrice.InnerText.Replace("\"", "").Replace(" ", ""));
                    }
                    catch
                    {
                        Console.WriteLine("Unable to grub price!");
                        return("");
                    }
                }
            }

            string grubModel()       //itemprop = "model" for Amplifier
            {
                var nodeModel = document.DocumentNode.SelectSingleNode(ModelNodeXPath);

                //Console.WriteLine(nodeModel.InnerText);
                return(nodeModel.InnerText);
            }

            string grubName()            //h1 id = "prodtitle" for Amplifier
            {
                var nodeName = document.DocumentNode.SelectSingleNode(NameNodeXPath);

                //Console.WriteLine(nodeName.InnerText);
                return(Regex.Replace(              //removing whitespaces
                           nodeName.InnerText, @"^\s+|\s+$", ""));
            }

            ///////////
            string        brand     = grubBrand();                              //Getting value for brand
            string        imageTemp = Regex.Replace(grubModel(), @"\s|\/", ""); //model.Replace(" ", "").Replace("/", "");
            string        image     = imageTemp + ".png";
            Transliterate trans     = new Transliterate();                      //Initializing Transliterate class for further usage

            temp[0] = grubName();
            temp[1] = grubModel();
            temp[2] = Regex.Replace(grubPrice(), @"\s|р\.", "");
            temp[3] = brand;
            if (desc() != null && feat != null)
            {
                temp[4] = desc().InnerHtml + "\n<h3>Особенности</h3>\n" + feat.InnerHtml;
            }
            else if (desc() != null)
            {
                temp[4] = desc().InnerHtml;
            }
            else
            {
                temp[4] = "";
            }
            temp[5] = (Specs() != null) ? trans.MyDecoding(Specs()) : "";
            temp[6] = attr_group();
            temp[7] = image;
            temp[8] = Avail();
            return(temp);
        }