Esempio n. 1
0
        public JobPost(HtmlNode node)
        {
            //Get links
            Id = Guid.NewGuid();
            var link = HTMLHandler.GetValidLinks(node.SelectNodes(".//a[@href]"), IsValid).FirstOrDefault();

            if (link != null)
            {
                JobName = link.InnerText;
                JobUrl  = new Uri(System.Configuration.ConfigurationManager.AppSettings["Domain"] + link.GetAttributeValue("href", ""));
                if (node.SelectSingleNode(".//span[contains(@class,'company')]//span") != null)
                {
                    Company = node.SelectSingleNode(".//span[contains(@class,'company')]//span").InnerText.RemoveAllNextLineCharacters();
                }
                if (node.SelectSingleNode(".//span[contains(@class,'location')]//span") != null)
                {
                    JobLocation = node.SelectSingleNode(".//span[contains(@class,'location')]//span").InnerText.RemoveAllNextLineCharacters();
                }
                if (node.SelectSingleNode(".//td[contains(@class, 'snip')]//nobr") != null)
                {
                    Salary = node.SelectSingleNode(".//td[contains(@class, 'snip')]//nobr").InnerText.RemoveAllNextLineCharacters();
                }
                if (node.SelectSingleNode(".//span[contains(@class, 'summary')]") != null)
                {
                    this.JobDescription = node.SelectSingleNode(".//span[contains(@class, 'summary')]").InnerText.RemoveAllNextLineCharacters();
                }
                JobPostDate = DateTime.Now;
                IsIndeed    = IndeedHandler.IsIndeedInsideLink(JobUrl);
            }
        }
        /// <summary>
        /// Function to take a load of text and write it to a file
        /// </summary>
        /// <param name="title">Title of the article, will be the name of the file</param>
        /// <param name="text">Text to be in the file</param>
        /// <returns></returns>
        public async static Task WriteTextToFile(string title, string text)
        {
            title = HTMLHandler.ReplaceColons(title);
            //Get the path of the file and get all the text
            string fileName = Path.Combine(dirPath, (title + ".wik"));

            File.WriteAllText(fileName, text);
        }
        /// <summary>
        /// Function to get the Text from a file
        /// </summary>
        /// <param name="title">Title of the article file to get text from</param>
        /// <returns>String that contains all the text from the file</returns>
        public async static Task <string> GetHTMLTextFromFile(string title)
        {
            title = HTMLHandler.ReplaceColons(title);
            //Get the path of the file and get all the text
            string fileName = Path.Combine(dirPath, (title + ".wik"));

            return(File.ReadAllText(fileName));
        }
Esempio n. 4
0
    public void DoTheCrawl()
    {
        var doc = HTMLHandler.GetHtml(this.UrlToCrawl.AbsoluteUri);
        //Get all the downloadable link
        HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[contains(@class,'result') and contains(@class, 'row')]");

        if (collection != null && collection.Count > 0)
        {
            foreach (HtmlNode obj in collection)
            {
                JobPost jp = new JobPost(obj)
                {
                    Category = this.JobCategory, Title = this.JobTitle
                };
                if (!string.IsNullOrEmpty(jp.JobName))
                {
                    if (jp.IsIndeed)
                    {
                        //TODO: get indeed data
                        var      jobDoc = HTMLHandler.GetHtml(jp.JobUrl.AbsoluteUri);
                        HtmlNode node   = jobDoc.DocumentNode.SelectSingleNode("//span[contains(@id,'job_summary')]");
                        if (node != null && node.InnerHtml.Length > 0)
                        {
                            var jobDtl = new JobDetail(jp.Id, node.InnerHtml);
                            //save the post
                            jp.Save();
                            jobDtl.Save();
                        }
                    }
                    else
                    {
                        //TODO: get outside link and update jobPost
                        jp.JobUrl = new Uri(jp.JobUrl.AbsoluteUri.GetFinalRedirect());
                        //Save the post only
                        jp.Save();
                    }


                    Console.WriteLine(string.Format("{0} - {1}", jp.JobName, jp.JobLocation));
                }

                Thread.Sleep(sleepingTime);
            }
        }



        //JobLinks = GetValidLinks(doc.DocumentNode.SelectNodes("//a[@href]"));

        foreach (LinkObject link in JobLinks)
        {
            //Process link data
            ProcessLink(link);
            Thread.Sleep(sleepingTime);
        }

        Thread.Sleep(sleepingTime);
    }
Esempio n. 5
0
        public AsyncHTTPServer(ushort port)
        {
            m_listener = new HttpListener();
            m_listener.Prefixes.Add("http://+:" + port + "/");

            /* Add HTTPS listener only on domain public address */
            if (Config <string> .GetInstance()["PUBLIC_ADDRESS"].Contains("://quickstream.me"))
            {
                m_listener.Prefixes.Add("https://+:443/");
            }

            m_handlers    = new Dictionary <string, IServable>();
            m_404Handler  = new Error404Handler();
            m_500Handler  = new Error500Handler();
            m_HTMLHandler = new HTMLHandler();
        }
        /// <summary>
        /// Function to save the HTML of a article to a local file.
        /// </summary>
        /// <param name="title">The title of the article to get the html from and the name of file to be saved to</param>
        /// <returns>Nothing</returns>
        public async static Task SaveHTMLFileToStorage(string title)
        {
            //Debug.WriteLine("Title: " + title);
            string HTMLText = "";

            //Call the API service to get the HTML text from wikipedia
            HTMLText = await APIServices.GetAllHTMLFromWikipediaArticle(title);

            //Get the path to the file where it will be stored
            title = HTMLHandler.ReplaceColons(title);
            string fileName = Path.Combine(dirPath, (title + ".wik"));

            //Write to file
            File.WriteAllText(fileName, HTMLText);
            //Debug.WriteLine("Wrote To file: " + Path.Combine(dirPath, (title + ".wik")));
        }
Esempio n. 7
0
        ///////////////
        // INITIATOR //
        ///////////////

        #region
        /// <summary>
        /// Begin the Download process
        /// </summary>
        public static void beginDownload()//rename this after cleanup
        {
            if (checkifDownloadCancelled())
            {
                return;
            }
            if (!verifyDownloadDirectory())
            {
                return;
            }
            Program.mainForm.lbOutput.BeginInvoke(new Action(() =>
                                                             Logger.logDownload("DOWNLOAD PROCESS COMMENCED")
                                                             ));

            numMaxDownload = 0;

            // setup
            setExistingFiles();
            resetDownloadLinks();
            trimArtistList();

            Program.mainForm.lblDownloadingFile.BeginInvoke(new Action(() =>
                                                                       Program.mainForm.lblDownloadingFile.ForeColor = System.Drawing.Color.Orange
                                                                       ));

            Program.mainForm.lbDownloadProgress.BeginInvoke(new Action(() =>
                                                                       Program.mainForm.lbDownloadProgress.ForeColor = System.Drawing.Color.Orange
                                                                       ));

            // download
            HTMLHandler.dig();
            if (!HTMLHandler.faIsInBeta)    // BETA UNSUPPORTED
            {                               // BETA UNSUPPORTED
                if (!downloadAfterEachParse)
                {
                    artistSetup();
                }
            }                               // BETA UNSUPPORTED

            downloadCompleteCleanup();

            Program.mainForm.lbOutput.BeginInvoke(new Action(() =>
                                                             Logger.logDownload("YOU'RE DONE SON")
                                                             ));
        }