예제 #1
0
        private static void GetNewDataFicheBloc(Unea_DetailCompany1 data, XXElement xeFiche)
        {
            //IEnumerator<XXElement> xeFicheBlocs = xeFiche.XPathElements(".//div[@class='fiche_bloc']").GetEnumerator();

            // <div class="fiche_entete"><!-- <h3>Logo UNEA</h3> --><h1>ALSACE ENTREPRISE ADAPTEE</h1></div>
            //data.name = xe.XPathValue(".//div[@class='fiche_entete']//text()", __trimFunc1);

            //GetDataFicheBlocNo1
            //foreach (string text in xe.DescendantTextList(".//td[@class='fiche_infos']", func: __trimFunc2))
            //GetDataFicheBlocNo2
            //foreach (XText xtext in xe.DescendantTextNodeList(".//div[@class='fiche_contenu']"))
            //GetDataFicheBlocNo3
            //foreach (string text in xe.DescendantTextList(".//table", func: __trimFunc2))

            bool          firstText = true;
            Unea_TextType textType  = Unea_TextType.unknow;

            //foreach (XText xtext in xeFiche.DescendantTextNodeList(".//div[@class='fiche_bloc']"))
            foreach (XText xtext in xeFiche.XPathElements(".//div[@class='fiche_bloc']").DescendantTextNodes())
            {
                string text = __trimFunc2(xtext.Value);
                if (text == "")
                {
                    continue;
                }

                // fiche_bloc no 1
                if (text.Equals("Activités", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.activity;
                }
                else if (text.Equals("Région - Département", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.location;
                }
                else if (text.Equals("Filières Métiers UNEA", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.sector;
                }
                // fiche_bloc no 2
                else if (text.Equals("Présentation de l'Entreprise Adaptée", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.unknow;
                }
                else if (text.Equals("Présentation", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.presentation;
                }
                else if (text.Equals("Principaux clients", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.client;
                }
                else if (text.Equals("Dirigeant", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.leader;
                }
                else if (text.Equals("Nombre de salariés", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.employeNumber;
                }
                else if (text.Equals("Chiffre d'affaire de l'année écoulée", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.lastYearRevenue;
                }
                else if (text.Equals("Certification", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.certification;
                }
                else if (text.Equals("Numéro SIRET", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.siret;
                }
                else if (text.Equals("Photos", StringComparison.InvariantCultureIgnoreCase))
                {
                    foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("ancestor::p/following-sibling::p//img"))
                    {
                        string url = xe2.XPathValue("@src");
                        if (!data.photos.ContainsKey(url))
                        {
                            data.photos.Add(url, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning photo already exists \"{0}\"", url);
                        }
                    }
                    textType = Unea_TextType.novalues;
                }
                else if (text.Equals("Documents téléchargeables", StringComparison.InvariantCultureIgnoreCase))
                {
                    //foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("ancestor::p/following-sibling::p//a"))
                    bool stop = false;
                    //foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("ancestor::p/following-sibling::p",
                    //    e =>
                    //    {
                    //        if (e.Value.StartsWith("Photos", StringComparison.InvariantCultureIgnoreCase))
                    //            stop = true;
                    //        return !stop;
                    //    }))
                    foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("ancestor::p/following-sibling::p").Where(
                                 e =>
                    {
                        if (e.XElement.Value.StartsWith("Photos", StringComparison.InvariantCultureIgnoreCase))
                        {
                            stop = true;
                        }
                        return(!stop);
                    }))
                    {
                        XXElement xe3 = xe2.XPathElement(".//a", writeError: false);
                        if (xe3.XElement == null)
                        {
                            continue;
                        }
                        string url = xe3.XPathValue("@href");
                        //string name = name = xe3.XPathValue(".//text()", __trimFunc2);
                        string name = __trimFunc2(xe3.XPathValue(".//text()"));
                        if (!data.downloadDocuments.ContainsKey(url))
                        {
                            data.downloadDocuments.Add(url, new Unea_Document()
                            {
                                name = name, url = url
                            });
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning download document already exists \"{0}\" \"{1}\"", name, url);
                        }
                    }
                    // textType = novalues pour ne pas avoir Plaquette_AEA.pdf dans unknowInfos
                    textType = Unea_TextType.novalues;
                }
                // fiche_bloc no 3
                else if (text.Equals("Nous contacter", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.unknow;
                }
                else if (text.Equals("Adresse", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.address;
                }
                else if (text.Equals("Téléphone", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.phone;
                }
                else if (text.Equals("Fax", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.fax;
                }
                else if (text.Equals("Adresse e-mail", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.email;
                }
                else if (text.Equals("Site internet", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.webSite;
                }
                else
                {
                    switch (textType)
                    {
                    // fiche_bloc no 1
                    case Unea_TextType.activity:
                        if (!data.activities.ContainsKey(text))
                        {
                            data.activities.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning activity already exists \"{0}\"", text);
                        }
                        break;

                    case Unea_TextType.location:
                        data.location = text;
                        textType      = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.sector:
                        if (!data.sectors.ContainsKey(text))
                        {
                            data.sectors.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning sector already exists \"{0}\"", text);
                        }
                        break;

                    // fiche_bloc no 2
                    case Unea_TextType.presentation:
                        data.presentation = text;
                        textType          = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.client:
                        data.clients = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.leader:
                        data.leader = text;
                        textType    = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.employeNumber:
                        int employeNumber;
                        if (int.TryParse(text, out employeNumber))
                        {
                            data.employeNumber = employeNumber;
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("error unknow employe number \"{0}\"", text);
                        }
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.lastYearRevenue:
                        data.lastYearRevenue = text;
                        textType             = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.certification:
                        data.certification = text;
                        textType           = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.siret:
                        data.siret = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    // fiche_bloc no 3
                    case Unea_TextType.address:
                        if (data.address == null)
                        {
                            data.address = text;
                        }
                        else
                        {
                            data.address += " " + text;
                        }
                        break;

                    case Unea_TextType.phone:
                        data.phone = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.fax:
                        data.fax = text;
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.email:
                        data.email = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.webSite:
                        data.webSite = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.novalue:
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.novalues:
                        break;

                    default:
                        if (firstText)
                        {
                            data.name = text;
                            firstText = false;
                        }
                        else
                        {
                            data.unknowInfos.Add(text);
                        }
                        break;
                    }
                }
            }
        }
예제 #2
0
        protected override Unea_HeaderCompany[] GetData()
        {
            XXElement xeSource = new XXElement(GetXmlDocument().Root);
            string    url      = Url;
            // <div class="ctn_result">
            IEnumerable <XXElement>   xeHeaders = xeSource.XPathElements("//div[@class = 'ctn_result']");
            List <Unea_HeaderCompany> headers   = new List <Unea_HeaderCompany>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                Unea_HeaderCompany header = new Unea_HeaderCompany();
                header.sourceUrl       = url;
                header.loadFromWebDate = DateTime.Now;

                // <div class="ctn_result-header">
                XXElement xe = xeHeader.XPathElement(".//div[@class='ctn_result-header']");

                // <div class="lien"><a href="http://www.unea.fr/union-nationale-entreprises-adaptees/annuaire-unea/71/4583/ACCAA TAKTIM.htm" target="_blank"><strong>></strong> Voir la fiche détaillée</a></div>
                header.urlDetail2 = zurl.GetUrl(url, xe.ExplicitXPathValue(".//div[@class = 'lien']//a/@href"));

                // <iframe src="detail.asp?id=4583" width="420" height="800" frameborder="0" scrolling="auto" marginheight="0" marginwidth="0"></iframe>
                header.urlDetail1 = zurl.GetUrl(url, xe.ExplicitXPathValue(".//iframe/@src"));

                // <h4><a href="http://www.unea.fr/union-nationale-entreprises-adaptees/annuaire-unea/71/4583/ACCAA TAKTIM.htm"  target="_blank">&nbsp;</a><span>|</span> ACCAA TAKTIM</h4>
                //header.name = xe.DescendantTextList(func: __trimFunc2).LastOrDefault();
                header.name = xe.DescendantTexts().Select(__trimFunc2).LastOrDefault();

                // <div class="ctn_result-content clearfix">
                // ...
                // <p>
                // <strong>Activités:</strong> TRAVAUX PAYSAGERS<br>PROPRETE<br>PRESTATION DE SERVICES<br>SOUS TRAITANCE INDUSTRIELLE<br>MECANIQUE<br>AUTOMOBILE<br>METALLURGIE<br />
                // <strong>Région - Département:</strong> Alsace - HAUT RHIN (68)<br />
                // <strong>Téléphone:</strong> 0389570210&nbsp;&nbsp;&nbsp;&nbsp;
                // <strong>Fax:</strong> 0389571761&nbsp;&nbsp;&nbsp;&nbsp;
                // <strong>Adresse e-mail:</strong>
                // <a href="mailto:[email protected]">[email protected]</a>
                // </p>
                // </div>
                Unea_TextType textType = Unea_TextType.unknow;
                //foreach (string s in xeHeader.DescendantTextList(".//div[@class = 'ctn_result-content clearfix']", func: __trimFunc2))
                foreach (string s in xeHeader.XPathElements(".//div[@class = 'ctn_result-content clearfix']").DescendantTexts().Select(__trimFunc2))
                {
                    if (s.Equals("Activités", StringComparison.InvariantCultureIgnoreCase))
                    {
                        textType = Unea_TextType.activity;
                    }
                    else if (s.Equals("Région - Département", StringComparison.InvariantCultureIgnoreCase))
                    {
                        textType = Unea_TextType.location;
                    }
                    else if (s.Equals("Téléphone", StringComparison.InvariantCultureIgnoreCase))
                    {
                        textType = Unea_TextType.phone;
                    }
                    else if (s.Equals("Fax", StringComparison.InvariantCultureIgnoreCase))
                    {
                        textType = Unea_TextType.fax;
                    }
                    else if (s.Equals("Adresse e-mail", StringComparison.InvariantCultureIgnoreCase))
                    {
                        textType = Unea_TextType.email;
                    }
                    else
                    {
                        switch (textType)
                        {
                        case Unea_TextType.activity:
                            if (!header.activities.ContainsKey(s))
                            {
                                header.activities.Add(s, null);
                            }
                            break;

                        case Unea_TextType.location:
                            header.location = s;
                            textType        = Unea_TextType.unknow;
                            break;

                        case Unea_TextType.phone:
                            header.phone = s;
                            textType     = Unea_TextType.unknow;
                            break;

                        case Unea_TextType.fax:
                            header.fax = s;
                            textType   = Unea_TextType.unknow;
                            break;

                        case Unea_TextType.email:
                            header.email = s;
                            textType     = Unea_TextType.unknow;
                            break;

                        default:
                            header.unknowInfos.Add(s);
                            break;
                        }
                    }
                }

                headers.Add(header);
            }
            return(headers.ToArray());
        }
예제 #3
0
        protected override Unea_DetailCompany2 GetData()
        {
            XXElement           xeSource = new XXElement(GetXmlDocument().Root);
            Unea_DetailCompany2 data     = new Unea_DetailCompany2();

            data.sourceUrl       = Url;
            data.loadFromWebDate = DateTime.Now;

            // <div class='ctn_content-article'>
            XXElement xeContent = xeSource.XPathElement(".//div[@class='ctn_content-article']");

            //IEnumerator<string> texts = xeContent.DescendantTextList(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table"), func: __trimFunc2).GetEnumerator();
            IEnumerator <string> texts = xeContent.DescendantTexts(node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table") ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).Select(__trimFunc2).GetEnumerator();

            // <h1>
            // <img src="http://unea.griotte.biz/BaseDocumentaire/Docs/Public/4017/LOGOAmpouleC.JPG" style='border-width:2px;border-color:#5593C9;' height='60px' />
            // <span>Entreprise Adapt&eacute;e</span><br />
            // ALSACE ENTREPRISE ADAPTEE
            // </h1>
            if (texts.MoveNext() && texts.MoveNext())
            {
                data.name = texts.Current;
            }

            // <h2>ALSACE ENTREPRISE ADAPTEE est implant&eacute;e sur les sites de Colmar et Mulhouse avec un effectif de 106 salari&eacute;s, avec les activit&eacute;s sous-traitance : assemblage de pi&egrave;ces, cintrage de tuyaux, montage complexe, ainsi qu'une activit&eacute; prestation de service en espaces verts, m&eacute;nage et transport.</h2>
            if (texts.MoveNext())
            {
                data.presentation = texts.Current;
            }

            Unea_TextType textType = Unea_TextType.unknow;

            //foreach (XText xtext in xeContent.DescendantTextNodeList(".//table"))
            foreach (XText xtext in xeContent.XPathElements(".//table").DescendantTextNodes())
            {
                string text = __trimFunc2(xtext.Value);
                if (text == "")
                {
                    continue;
                }
                if (text.Equals("NOS ACTIVITES", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.activity;
                }
                else if (text.Equals("FILIERES METIER UNEA", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.sector;
                }
                else if (text.Equals("DOCUMENTS TÉLÉCHARGEABLES", StringComparison.InvariantCultureIgnoreCase))
                {
                    foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("following-sibling::ul//a"))
                    {
                        string url = xe2.XPathValue("@href");
                        //string name = name = xe2.XPathValue(".//text()", __trimFunc2);
                        string name = __trimFunc2(xe2.XPathValue(".//text()"));
                        if (!data.downloadDocuments.ContainsKey(url))
                        {
                            data.downloadDocuments.Add(url, new Unea_Document()
                            {
                                name = name, url = url
                            });
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning download document already exists \"{0}\" \"{1}\"", name, url);
                        }
                    }
                    // textType = novalues pour ne pas avoir Plaquette_AEA.pdf dans unknowInfos
                    textType = Unea_TextType.novalues;
                }
                else if (text.Equals("NOUS CONTACTER", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.novalue;
                }
                else if (text.Equals("ADRESSE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.address;
                }
                else if (text.Equals("TELEPHONE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.phone;
                }
                else if (text.Equals("FAX", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.fax;
                }
                else if (text.Equals("EMAIL", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.email;
                }
                else if (text.Equals("SITE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.webSite;
                }
                else if (text.Equals("QUI SOMMES NOUS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.novalue;
                }
                else if (text.Equals("DIRIGEANT", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.leader;
                }
                else if (text.Equals("NOMBRE DE SALARIÉS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.employeNumber;
                }
                else if (text.Equals("CHIFFRE D'AFFAIRE DE L'ANNÉE ÉCOULÉE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.lastYearRevenue;
                }
                else if (text.Equals("NUMÉRO SIRET", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.siret;
                }
                else if (text.Equals("CERTIFICATION", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.certification;
                }
                else if (text.Equals("PRINCIPAUX CLIENTS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.client;
                }
                else
                {
                    switch (textType)
                    {
                    case Unea_TextType.activity:
                        if (!data.activities.ContainsKey(text))
                        {
                            data.activities.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning activity already exists \"{0}\"", text);
                        }
                        break;

                    case Unea_TextType.sector:
                        //data.sectors.Add(text);
                        if (!data.sectors.ContainsKey(text))
                        {
                            data.sectors.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning sector already exists \"{0}\"", text);
                        }
                        break;

                    case Unea_TextType.address:
                        if (data.address == null)
                        {
                            data.address = text;
                        }
                        else
                        {
                            data.address += " " + text;
                        }
                        break;

                    case Unea_TextType.phone:
                        data.phone = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.fax:
                        data.fax = text;
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.email:
                        data.email = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.webSite:
                        data.webSite = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.leader:
                        data.leader = text;
                        textType    = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.employeNumber:
                        int employeNumber;
                        if (int.TryParse(text, out employeNumber))
                        {
                            data.employeNumber = employeNumber;
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("error unknow employe number \"{0}\"", text);
                        }
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.lastYearRevenue:
                        if (text != "€")
                        {
                            data.lastYearRevenue = text;
                        }
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.siret:
                        data.siret = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.certification:
                        data.certification = text;
                        textType           = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.client:
                        data.clients = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.novalues:
                        break;

                    default:
                        data.unknowInfos.Add(text);
                        break;
                    }
                }
            }

            foreach (XXElement xe in xeContent.XPathElements(".//table//td/a/img"))
            {
                string url = xe.XPathValue("@src");
                if (!data.photos.ContainsKey(url))
                {
                    data.photos.Add(url, null);
                }
                else
                {
                    Trace.CurrentTrace.WriteLine("warning photo already exists \"{0}\"", url);
                }
            }

            return(data);
        }