Example #1
0
        public IEnumerable<PollProtocol> LoadProtocols(IRegionResolver regionResolver)
        {
            while (Reader.ReadNextRecord())
            {
                string regionName = Reader[ColumnIndexRegion];
                Int32 commision;

                Region region = regionResolver.GetOrCreate(regionName);

                if (!Int32.TryParse(Reader[ColumnIndexComission], out commision))
                    throw new InvalidDataException("Can't parse as Int32: " + Reader[ColumnIndexComission] + ". Row #" + Reader.CurrentRecordIndex);
                var results = new int[ResultColumnsCount];

                for (int i = 0; i < ResultColumnsCount; i++)
                {
                    results[i] = Int32.Parse(Reader[i + FirstResultColumnIndex]);
                }
                // Images
                List<PollProtocolImage> images = null;
                if (FirstImageUriColumnIndex > -1)
                {
                    images = new List<PollProtocolImage>();
                    Uri uri;
                    for (int i = FirstImageUriColumnIndex; i < Reader.FieldCount; i++)
                    {
                        string value = Reader[i];
                        if (string.IsNullOrWhiteSpace(value))
                            break;
                        if (!Uri.TryCreate(value, UriKind.Absolute, out uri))
                            throw new InvalidDataException("Can't parse as Uri: " + value + ". Row #" + Reader.CurrentRecordIndex);

                        images.Add(new PollProtocolImage { Uri = uri.ToString() });
                    }
                }

                var result = new PollProtocol
                             	{
                             		Region = region,
                             		Comission = commision,
                             		Results = results,
                             		Images = images
                             	};
                yield return result;
            }
        }
        public IEnumerable<PollProtocol> LoadProtocols(IRegionResolver regionResolver)
        {
            var htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(m_pageContent);

            var reUikNumber = new Regex(@"\d+", RegexOptions.Compiled | RegexOptions.Multiline);

            foreach (HtmlNode headUik in htmlDoc.DocumentNode.SelectNodes("//h3[@class='uik']"))
            {
                var regionNode = headUik.SelectSingleNode("preceding-sibling::h2[@class='oblast']");
                var uikText = headUik.InnerText;
                if (regionNode == null)
                {
                    m_logger.LogError("Can't find region node in the html");
                }
                else
                {
                    var match = reUikNumber.Match(uikText);
                    if (!match.Success)
                    {
                        m_logger.LogError("Can't parse comission number: " + uikText);
                        continue;
                    }
                    int comissionNum;
                    if (!Int32.TryParse(match.Value, out comissionNum))
                    {
                        m_logger.LogError("Can't parse parse comission number as integer: " + match.Value);
                        continue;
                    }

                    string regionName = regionNode.InnerText;
                    Region region = regionResolver.GetOrCreate(regionName);

                    // Results
                    var tableResults = headUik.SelectSingleNode("following-sibling::table[@class='observers_data']");
                    if (tableResults == null)
                    {
                        m_logger.LogError(String.Format("{0}/{1}: Can't find table with protocols results (table[@class='observers_data'])", regionName, comissionNum));
                        continue;
                    }
                    var trAltResults = tableResults.SelectSingleNode("descendant::tr[@class='obs']");
                    if (trAltResults == null)
                    {
                        m_logger.LogError(String.Format("{0}/{1}: Can't find table row with alternative protocol results (tr[@class='obs'])", regionName, comissionNum));
                        continue;
                    }

                    var tdResults = trAltResults.SelectNodes("td");
                    var results = new int[25];
                    if (tdResults.Count != 26)
                    {
                        m_logger.LogError(String.Format("{0}/{1}: Number of columns in row with protocol result doesn't equal expected value (26), but equals to {2}", regionName, comissionNum, tdResults.Count));
                        continue;
                    }

                    bool needBreak = false;
                    for (int i = 1; i < 26; i++)
                    {
                        string votesRaw = tdResults[i].InnerText;
                        int votes;
                        if (!Int32.TryParse(votesRaw, out votes))
                        {
                            m_logger.LogError(String.Format("{0}/{1}: Can't parse votes count as integer: {2}", regionName, comissionNum, tdResults.Count));
                            needBreak = true;
                            break;
                        }
                        results[i - 1] = votes;
                    }
                    if (needBreak)
                        continue;

                    // Images:
                    var imagesNode = headUik.SelectSingleNode("following-sibling::div[@class='photo_names']");
                    if (imagesNode == null)
                    {
                        m_logger.LogError(String.Format("{0}/{1}: Can't find div with protocol images (div[@class='photo_names'])", regionName, comissionNum));
                        continue;
                    }
                    var images = new List<PollProtocolImage>();
                    var imageAnchorNodes = imagesNode.SelectNodes("descendant::a");
                    if (imageAnchorNodes == null || imageAnchorNodes.Count == 0)
                        m_logger.LogWarning(String.Format("{0}/{1}: Can't find hyperlinks to protocol images (a)", regionName, comissionNum));
                    else
                    {
                        foreach (HtmlNode anchorNode in imageAnchorNodes)
                        {
                            string uri = anchorNode.GetAttributeValue("href", "");
                            // remove uri parameters (everything after "?")
                            var idx = uri.IndexOf("?");
                            if (idx > -1)
                                uri = uri.Substring(0, idx);
                            string filePath;
                            byte[] imageBytes = null;
                            if (m_imageFilesMap.TryGetValue(uri, out filePath))
                                imageBytes = File.ReadAllBytes(filePath);
                            var protocolImage = new PollProtocolImage
                                                    {
                                                        Uri = uri,
                                                        Image = imageBytes
                                                    };
                            images.Add(protocolImage);
                        }
                    }

                    var protocol = new PollProtocol
                                   	{
                                   		Region = region,
                                   		Comission = comissionNum,
                                   		Images = images,
                                   		Results = results
                                   	};

                    yield return protocol;
                }
            }
        }