Ejemplo n.º 1
0
        public static List <AnimalRecord> ParseRecords(string dataPath)
        {
            string[] files = Directory.GetFiles(dataPath);

            List <AnimalRecord> records = new List <AnimalRecord>();

            foreach (string f in files)
            {
                StreamReader reader = new StreamReader(f);
                string       html   = reader.ReadToEnd();
                reader.Close();
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                AnimalRecord r = AnimalRecord.HtmlDocumentToAnimalRecord(doc);
                if (r != null)
                {
                    records.Add(r);
                }
            }
            return(records);
        }
Ejemplo n.º 2
0
        public static List <AnimalRecord> ParseRecords(string dataPath, string[] filenames)
        {
            string[] files = new string[filenames.Length];
            for (int i = 0; i < filenames.Length; i++)
            {
                files[i] = dataPath + "\\" + filenames[i];
            }
            List <AnimalRecord> records = new List <AnimalRecord>();

            foreach (string f in files)
            {
                StreamReader reader = new StreamReader(f);
                string       html   = reader.ReadToEnd();
                reader.Close();
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                AnimalRecord r = AnimalRecord.HtmlDocumentToAnimalRecord(doc);
                if (r != null)
                {
                    records.Add(r);
                }
            }
            return(records);
        }
        //Parser

        public static AnimalRecord HtmlDocumentToAnimalRecord(HtmlAgilityPack.HtmlDocument d)
        {
            // ReSharper disable UseObjectOrCollectionInitializer
            AnimalRecord r = new AnimalRecord();

            // ReSharper restore UseObjectOrCollectionInitializer

            try
            {
                // ReSharper disable InconsistentNaming
                // ReSharper disable UnusedVariable
                char[] _s       = new[] { ' ' };
                char[] _colon   = new[] { ':' };
                char[] _comma   = new[] { ',' };
                char[] _comma_s = new[] { ',', ' ' };
                // ReSharper restore UnusedVariable
                // ReSharper restore InconsistentNaming

                //Title
                r.ReportTitle = d.DocumentNode.SelectSingleNode("//head/title").InnerText.Trim();
                //Animal Subsection
                r.ANum        = d.GetElementbyId("cphWorkArea_lblAnimalNumber").InnerText.Replace("Animal:", "").Trim();
                r.DatePrinted =
                    DateTime.Parse(d.GetElementbyId("cphWorkArea_lblPrintStamp").InnerText.Replace("Printed:", ""));
                //r.Logo
                //Animal Details Subsection
                r.ARN           = d.GetElementbyId("cphWorkArea_lblARN").InnerText.Trim();
                r.Name          = d.GetElementbyId("cphWorkArea_lblName").InnerText.Trim();
                r.AnimalSpecies = d.GetElementbyId("cphWorkArea_lblSpecies").InnerText.Trim();
                string[] description = d.GetElementbyId("cphWorkArea_lblLine1").InnerText.Split(_comma);
                r.AnimalType    = description[0].Trim();
                r.AnimalSubType = description.Length >= 5 ? description[1].Trim() : "No Subtype Listed";
                r.Color         = description[description.Length - 3].Trim();
                r.Size          = description[description.Length - 2].Trim();
                try
                {
                    r.WeightInLbs =
                        double.Parse(Regex.Match(description[description.Length - 1].Trim(), @"\d+\.?\d*").Value);
                }
                catch (Exception)
                {
                    r.WeightInLbs = -1;
                }
                r.AnimalGender          = d.GetElementbyId("cphWorkArea_lblGender").InnerText.Trim();
                r.AgeRangeWhenCollected = ParseAgeRangeWhenCollected(d.GetElementbyId("cphWorkArea_lblAgeGroup").InnerText.Trim());

                var reportAgeAndSpayedElement = d.GetElementbyId("cphWorkArea_lblLine2");
                if (reportAgeAndSpayedElement.ChildNodes.Count > 2) //Normal Case
                {
                    r.AgeAtReportPrint = reportAgeAndSpayedElement.ChildNodes[0].InnerText.Trim();
                    r.Spayed           = d.GetElementbyId("cphWorkArea_lblLine2").ChildNodes[4].InnerText.Trim().ToLower() ==
                                         "yes";
                    r.DateOfBirth = DateTime.Parse(d.GetElementbyId("cphWorkArea_lblLine2").ChildNodes[2].InnerText.Trim());
                }
                else
                {
                    r.AgeAtReportPrint = "Unknown";
                    r.Spayed           = d.GetElementbyId("cphWorkArea_lblLine2").ChildNodes[1].InnerText.Trim().ToLower() ==
                                         "yes";
                    r.DateOfBirth = DateTime.MinValue;
                }

                //?
                r.Declawed = d.GetElementbyId("cphWorkArea_lblLine3").ChildNodes[1].InnerText.Trim().ToLower() != "none";
                r.Bitten   = d.GetElementbyId("cphWorkArea_lblLine4").ChildNodes[1].InnerText.Trim();
                var area   = d.GetElementbyId("cphWorkArea_dgLocation").InnerHtml;
                var split0 = area.Split(new[] { "APA!! Parvo-Dog" }, StringSplitOptions.None);
                var split1 = split0[2].Split(new[] { "<td>", "</td>" }, StringSplitOptions.RemoveEmptyEntries);
                r.IntakeDateTime = DateTime.Parse(split1[0]);

                r.OutcomeType = d.GetElementbyId("cphWorkArea_RepeaterOutcome_hlkOutcomeType_0").InnerText;
                r.DiedOfParvo = r.OutcomeType == "Died" &&
                                d.GetElementbyId("cphWorkArea_RepeaterOutcome_txOutcomeSubType_0").InnerText ==
                                "Parvo Virus";

                r.Adopted = r.OutcomeType == "Adoption";

                var timeInWardArea   = d.GetElementbyId("cphWorkArea_dgLocation");
                var timeInWardString = "";
                for (int i = 0; i < timeInWardArea.ChildNodes.Count; i++)
                {
                    if (timeInWardArea.ChildNodes[i].InnerText.Contains("APA!! Parvo-Dog"))
                    {
                        timeInWardString = timeInWardArea.ChildNodes[i - 1].ChildNodes[3].InnerText;
                        break;
                    }
                }

                r.ParvoICUDischargeDateTime = DateTime.Parse(timeInWardString);
                r.TimeInParvoICU            = r.ParvoICUDischargeDateTime.Subtract(r.IntakeDateTime);
            }
            catch (Exception)
            {
                errCount++;
                Console.WriteLine("Error A#=" + r.ANum + " this is error " + errCount);
                return(null);
            }

            return(r);
        }