示例#1
0
        private void ParsePage(string url)
        {
            if (!_db.Connect())
            {
                MessageBox.Show(General.ERROR_DB_CONNECT);
                _db.Close();
                Invoke(new Action(() =>
                {
                    startButton.Text    = "Начать";
                    startButton.Enabled = true;
                    statusLabel.Text    = General.STATUS_DEFAULT;
                }));
                return;
            }

            /*
             * Трём '#' и всё после него, если таковой есть
             */
            var shard_idx = url.IndexOf("#");

            if (shard_idx > -1)
            {
                url = url.Remove(shard_idx);
            }
            //
            Invoke(new Action(() =>
            {
                textBox1.Text = url;
            }));
            //

            var  lastId   = _db.GetLastID() + 1;
            var  siteType = General.GetSiteType(url);
            uint cnt      = 0;

            if (siteType == SiteType.Farpost)
            {
                if (url.Contains("status=archive"))
                {
                    _archiveFlag = true;
                }
                else
                {
                    _archiveFlag = false;
                }
                var singleType = Farpost.GetTypeByLink(url);
                var dateTime   = DateTime.Now.ToString("yyyyMMdd_HHmmss");
                var folderName = dateTime + "_" + singleType + (_archiveFlag ? "_arc" : string.Empty);
                var workingDir = General.FARPOST_FOLDER + folderName + "\\";
                var Comment    = (CommentBox.Text != String.Empty ? CommentBox.Text.Replace(" ", "_") : string.Empty);
                Directory.CreateDirectory(workingDir);
                List <Advertisement> advsData;
                var saveTo  = workingDir + "L_" + dateTime + "_" + (_archiveFlag ? "_arc" : string.Empty) + singleType + ".html";
                var typeInt = _db.GetAdvTypeByString(singleType);
                if (url.Contains("?page=") || url.Contains("&page="))
                {
                    Invoke(new Action(() =>
                    {
                        statusLabel.Text = "Собираем информацию с ленты...";
                    }));
                    advsData = Farpost.GetDataFromFeed(url, saveTo);
                    _db.InsertPageInfo(saveTo, typeInt, lastId, lastId + advsData.Count - 1, DateTime.Now.ToString("yyyy.dd.MM hh:mm:ss"));
                }
                else
                {
                    var pages = Farpost.GetItemsCount(url);
                    pages = (int)Math.Ceiling((float)(pages / 50)) + 1;
                    if (pages != 1)
                    {
                        Invoke(new Action(() =>
                        {
                            statusLabel.Text = "Собираем информацию со всех страниц ленты (0/" + pages + ")...";
                        }));
                        advsData = new List <Advertisement>();
                        for (var i = 1; i <= pages; i++)
                        {
                            if (_stopFlag)
                            {
                                break;
                            }
                            saveTo = workingDir + "L_" + dateTime + "_" + (_archiveFlag ? "_arc" : string.Empty) + singleType + "_" + i + ".html";
                            var urlp = url + (url.Contains("?") ? "&" : "?") + "page=" + i;
                            var temp = Farpost.GetDataFromFeed(urlp, saveTo);

                            advsData.AddRange(temp);
                            _db.InsertPageInfo(saveTo, typeInt, lastId, lastId + advsData.Count - 1, DateTime.Now.ToString("yyyy.dd.MM hh:mm:ss"));
                            Invoke(new Action(() =>
                            {
                                statusLabel.Text = "Собираем информацию со всех страниц ленты (" + i + "/" + pages + ")...";
                            }));
                            if (pages != i)
                            {
                                General.SmallDelay();
                            }
                        }
                    }
                    else
                    {
                        Invoke(new Action(() =>
                        {
                            statusLabel.Text = "Собираем информацию с ленты...";
                        }));
                        advsData = Farpost.GetDataFromFeed(url, saveTo);
                        _db.InsertPageInfo(saveTo, typeInt, lastId, lastId + advsData.Count - 1, DateTime.Now.ToString("yyyy.dd.MM hh:mm:ss"));
                    }
                }
                if (_stopFlag)
                {
                    _db.Close();
                    Invoke(new Action(() =>
                    {
                        startButton.Text    = "Начать";
                        startButton.Enabled = true;
                        statusLabel.Text    = General.STATUS_DEFAULT;
                    }));
                    return;
                }
                if (advsData == null || advsData.Count == 0)
                {
                    MessageBox.Show("Не удалось собрать объявления с указаной страницы.");
                    _db.Close();
                    Invoke(new Action(() =>
                    {
                        startButton.Text    = "Начать";
                        startButton.Enabled = true;
                        statusLabel.Text    = General.STATUS_DEFAULT;
                    }));
                    return;
                }
                cnt = 0;
                Invoke(new Action(() =>
                {
                    listBox1.Items.Clear();
                }));
                Invoke(new Action(() =>
                {
                    statusLabel.Text = "Обработка данных...";
                }));
                var todel   = new List <Advertisement>();
                var updated = new List <int>();
                var trigger = checkBox1.Checked;
                int newId   = lastId;
                for (var i = 0; i < advsData.Count; i++)
                {
                    if (_stopFlag)
                    {
                        break;
                    }
                    var id = _db.GetIdByNumber(advsData[i].Number);
                    if (id != -1) // Если объява с таким внутренним идом уже существует, то
                    {
                        // Получаем информацию о ней. И если цена и площадь остались прежними - удаляем из списка на инсерты данную великолепную запись.
                        var adv = _db.GetAdvPdata(id);
                        if ((!trigger && General.GetNumbers(adv.Price) == General.GetNumbers(advsData[i].Price) && General.GetNumbers(adv.Square) == General.GetNumbers(advsData[i].Square) ||
                             (trigger && General.GetNumbers(adv.Price) == General.GetNumbers(advsData[i].Price) && General.GetNumbers(adv.Square) == General.GetNumbers(advsData[i].Square) && _db.GetTownByMainId(id) == -1 && _db.GetTownByMainId(id) == 0)))
                        {
                            todel.Add(advsData[i]);
                            continue;
                        }
                        updated.Add(newId);
                    }

                    _db.InsertRecord();
                    newId++;
                }
                foreach (var item in todel)
                {
                    advsData.Remove(item);
                }
                string distr = String.Empty;
                if (advsData.Count > 0)
                {
                    foreach (var advData in advsData)
                    {
                        if (_stopFlag)
                        {
                            break;
                        }
                        Invoke(new Action(() =>
                        {
                            listBox1.Items.Add(advData.Link);
                            countLabel.Text = "Кол-во объявлений: " + ++cnt;
                        }));
                        if (advData.Annotations != null)
                        {
                            foreach (string item in advData.Annotations)
                            {
                                if (item.Contains("р-н"))
                                {
                                    distr = item;
                                    break;
                                }
                            }
                        }
                        var dir    = workingDir + lastId + "_" + advData.Number + "_" + DateTime.Now.ToString("yyyyddMM_hhmmss") + (_archiveFlag ? "_arc" : string.Empty);
                        var newdir = workingDir.Trim('\\') + "_" + advsData.Count + Comment;
                        int is_new = 1;
                        if (updated.Contains(lastId))
                        {
                            is_new = 0;
                        }
                        int source = 1;
                        if (_archiveFlag)
                        {
                            source = 2;
                        }
                        _db.UpdateAdvData(lastId, is_new, source, advData.Number, advData.Link, "www.farpost.ru", newdir, advData.CurLink, advData.Title, advData.Price, advData.Square, advData.Geo, advData.City, distr, advData.Views, typeInt);
                        _db.InsertSecData(lastId);
                        Directory.CreateDirectory(dir);
                        lastId++;
                    }
                    if (singleType.Contains("КН_") && !(url.Contains("&flatType") || url.Contains("?flatType")))
                    {
                        var commercialTypes = new string[][] {
                            new [] { "outlet", "Торговая точка" },
                            new [] { "manufacture", "Производство" },
                            new [] { "office", "Офис" },
                            new [] { "storage", "Склад" },
                            new [] { "etc", "Другое" },
                        };
                        var advsDataAd = new List <Advertisement>();
                        foreach (var commercialType in commercialTypes)
                        {
                            var urlp  = url + (url.Contains("?") ? "&" : "?") + "flatType%5B%5D=" + commercialType[0];
                            var pages = Farpost.GetItemsCount(urlp);
                            pages = (int)Math.Ceiling((float)(pages / 50)) + 1;
                            if (pages != 1)
                            {
                                for (var i = 1; i <= pages; i++)
                                {
                                    if (_stopFlag)
                                    {
                                        break;
                                    }
                                    var paged = urlp + (urlp.Contains("?") ? "&" : "?") + "page=" + i;
                                    var temp  = Farpost.GetDataFromFeed(paged, null);
                                    advsDataAd.AddRange(temp);
                                    if (pages != i)
                                    {
                                        General.SmallDelay();
                                    }
                                }
                            }
                            else
                            {
                                advsDataAd = Farpost.GetDataFromFeed(urlp, null);
                            }
                            foreach (var advDataAd in advsDataAd)
                            {
                                _db.UpdateComType(advDataAd.Number, commercialType[1]);
                            }
                        }
                    }
                }
                var newPath = workingDir.Trim('\\') + "_" + advsData.Count + "_" + Comment;
                Directory.Move(workingDir, newPath);
                _lastSaveToPath = newPath;
            }
            else if (siteType == SiteType.Cian)
            {
                var links = Cian.GetLinksFromFeed(url);
                if (links == null)
                {
                    MessageBox.Show("Не удалось собрать ссылки на объявления с указаной страницы.");
                    _db.Close();
                    Invoke(new Action(() =>
                    {
                        startButton.Text    = "Начать";
                        startButton.Enabled = true;
                        statusLabel.Text    = General.STATUS_DEFAULT;
                    }));
                    return;
                }
                cnt = 0;
                Invoke(new Action(() =>
                {
                    listBox1.Items.Clear();
                }));
                foreach (var link in links)
                {
                    if (string.IsNullOrEmpty(link))
                    {
                        continue;
                    }
                    Invoke(new Action(() =>
                    {
                        listBox1.Items.Add(link);
                        countLabel.Text = "Кол-во объявлений: " + ++cnt;
                    }));
                    General.Delay();
                }
            }
            _db.Close();
            Invoke(new Action(() =>
            {
                startButton.Text    = "Начать";
                startButton.Enabled = true;
                statusLabel.Text    = General.STATUS_DEFAULT;
            }));
        }
示例#2
0
        private void ParseAdvs()
        {
            if (!_db.Connect())
            {
                MessageBox.Show(General.ERROR_DB_CONNECT);
                Invoke(new Action(() =>
                {
                    statusLabel.Text      = STATUS_BAR_DEFAULT;
                    uploadButton.Enabled  = true;
                    exploreButton.Enabled = true;
                    parseButton.Enabled   = true;
                    exportButton.Enabled  = true;
                    textBox2.Enabled      = true;
                    fromBox.Enabled       = true;
                    toBox.Enabled         = true;
                }));
                _db.Close();
                return;
            }

            var  targets = textBox2.Text.Split(';');
            var  def     = (int)(toBox.Value - fromBox.Value) + 1;
            uint i       = 0;

            foreach (var target in targets)
            {
                var source       = (target.Contains("_arc")) ? 2 : 1;
                var dirs         = Directory.GetDirectories(target);
                var pathElements = target.Split('\\');
                var comma        = pathElements[pathElements.Length - 1].Split('_');
                var type         = comma[2] + '_' + comma[3];
                foreach (var dir in dirs)
                {
                    if (_stopFlag)
                    {
                        break;
                    }
                    var dsp   = dir.Split('\\');
                    var desp  = dsp[dsp.Length - 2];
                    var idStr = dsp[dsp.Length - 1].Split('_')[0];
                    if (!int.TryParse(idStr, out int id))
                    {
                        i++;
                        continue;
                    }
                    if (id < fromBox.Value || id > toBox.Value)
                    {
                        i++;
                        continue;
                    }
                    char status     = '-';
                    var  singleName = dir.Split('\\')[dir.Split('\\').Length - 1];
                    var  fileName   = target + "\\" + singleName + ".html";
                    if (!File.Exists(fileName))
                    {
                        fileName = target + "\\" + singleName + "_arch.html";
                        if (!File.Exists(fileName))
                        {
                            fileName = target + "\\" + singleName + "_udalen.html";
                            if (!File.Exists(fileName))
                            {
                                i++;
                                continue;
                            }
                        }
                    }
                    if (fileName.Contains("udalen"))
                    {
                        status = 'u';
                        _db.UpdateStatus(id, status, source);
                        continue;
                    }

                    FileInfo fileInfo = new FileInfo(fileName);
                    string   newname  = fileName;
                    if (fileInfo.Length <= 25600)
                    {
                        //if(!fileName.Contains("udalen"))
                        newname = target + "\\" + singleName + "_udalen.html";
                        File.Move(fileName, newname);
                        status = 'u';
                        _db.UpdateStatus(id, status, source);
                        continue;
                    }
                    if (fileName.Contains("arch"))
                    {
                        status = 'a';
                    }
                    _db.UpdateStatus(id, status, source);

                    var doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(File.ReadAllText(fileName));
                    var restrictions = General.ExtractValueFromDoc(SiteType.Farpost, "restrictions", doc);
                    var agency       = General.ExtractValueFromDoc(SiteType.Farpost, "agency", doc);
                    var rooms        = General.ExtractValueFromDoc(SiteType.Farpost, "rooms", doc);
                    var description  = General.ExtractValueFromDoc(SiteType.Farpost, "description", doc);
                    _db.UpdateAdvData(id, restrictions, string.Empty, agency, rooms, description);

                    var added = General.ExtractValueFromDoc(SiteType.Farpost, "added", doc);
                    if (!String.IsNullOrEmpty(added))
                    {
                        var underlines = singleName.Split('_');

                        /*var savedDateTime = DateTime.MinValue;
                         * DateTime.TryParseExact(underlines[2], "yyyyddMM", new CultureInfo("ru-RU"), DateTimeStyles.None, out savedDateTime);*/
                        var savedDateTime = File.GetCreationTime(fileName);
                        var hour          = 0;
                        var minute        = 0;
                        var dateTime      = DateTime.MinValue;
                        var addedArr      = added.Split(',');
                        if (addedArr.Length >= 2)
                        {
                            var time    = addedArr[0].Trim();
                            var timeArr = time.Split(':');
                            if (timeArr.Length >= 2)
                            {
                                int.TryParse(timeArr[0], out hour);
                                int.TryParse(timeArr[1], out minute);
                            }
                            var date = addedArr[1].Trim().ToLower();
                            if (date == "сегодня")
                            {
                                dateTime = new DateTime(savedDateTime.Year, savedDateTime.Month, savedDateTime.Day, hour, minute, 0);
                                date     = "Сегодня";
                            }
                            else if (date == "вчера")
                            {
                                dateTime = new DateTime(savedDateTime.Year, savedDateTime.Month, savedDateTime.Day, hour, minute, 0);
                                dateTime.AddDays(-1);
                                date = "Вчера";
                            }
                            else
                            {
                                var dateArr = date.Split(' ');
                                var format  = dateArr.Length == 2 ? "d MMMM" : "d MMMM yyyy";
                                DateTime.TryParseExact(date, format, new CultureInfo("ru-RU"), DateTimeStyles.None, out dateTime);
                            }
                            _db.UpdateDate(id, dateTime, date);
                        }
                    }

                    var username = General.ExtractValueFromDoc(SiteType.Farpost, "username", doc);
                    var phone    = General.ExtractValueFromDoc(SiteType.Farpost, "phone", doc);
                    var email    = General.ExtractValueFromDoc(SiteType.Farpost, "email", doc);
                    var advsStr  = General.GetNumbers(General.ExtractValueFromDoc(SiteType.Farpost, "advs", doc));
                    int.TryParse(advsStr, out int advs);
                    _db.UpdateUserInfo(id, username, phone, email, advs);

                    var address  = General.ExtractValueFromDoc(SiteType.Farpost, "address", doc);
                    var hood     = General.ExtractValueFromDoc(SiteType.Farpost, "district", doc);
                    var cadastre = General.ExtractValueFromDoc(SiteType.Farpost, "cadastreNumber", doc);
                    var city     = General.ExtractValueFromDoc(SiteType.Farpost, "city", doc);
                    _db.UpdateAddr(id, address, string.Empty, string.Empty, string.Empty, string.Empty, cadastre, city);

                    var advName   = fileName.Split('\\')[fileName.Split('\\').Length - 1];
                    var advNumber = advName.Split('_')[1];

                    var ID = _db.GetIdByNumber(advNumber);
                    if (ID != -1) // Если объява с таким внутренним идом уже существует
                    {
                        var adv    = _db.GetAdvPdata(ID);
                        var advOld = _db.GetAdvPdata(id);
                        if ((General.GetNumbers(adv.Price) != General.GetNumbers(advOld.Price) || General.GetNumbers(adv.Square) != General.GetNumbers(advOld.Square)))
                        {
                            _db.UpdateIsNew(id, 0);
                        }
                    }
                    else
                    {
                        _db.UpdateIsNew(id, 1);
                    }

                    if (type == "КН_ар")
                    {
                        var url     = _db.GetUrlById(id);
                        var comType = "n/a";
                        if (url.Contains("/market/"))
                        {
                            comType = "Торговая точка";
                        }
                        else if (url.Contains("/terminal/"))
                        {
                            comType = "Складское помещение";
                        }
                        else if (url.Contains("/workroom/"))
                        {
                            comType = "Производственное помещение";
                        }
                        else if (url.Contains("/office/"))
                        {
                            comType = "Офисное помещенине";
                        }
                        _db.UpdateComTypeSec(advNumber, comType);
                        try { stopButton.Enabled = false; } catch { }
                    }
                    else if (type == "КН_пр" && !string.IsNullOrEmpty(rooms))
                    {
                        _db.UpdateComTypeSec(advNumber, char.ToUpper(rooms[0]) + rooms.Substring(1));
                    }

                    Invoke(new Action(() =>
                    {
                        statusLabel.Text = "Обработка объявления #" + id + " [" + i + "/" + def + "]";
                    }));
                    i++;
                }
            }
            Invoke(new Action(() =>
            {
                statusLabel.Text      = STATUS_BAR_DEFAULT;
                uploadButton.Enabled  = true;
                exploreButton.Enabled = true;
                parseButton.Enabled   = true;
                exportButton.Enabled  = true;
                textBox2.Enabled      = true;
                fromBox.Enabled       = true;
                toBox.Enabled         = true;
                stopButton.Enabled    = false;
            }));
            _db.Close();
        }