Ejemplo n.º 1
0
        /// <summary>
        /// Method which returns a list of words for every links on the website
        /// </summary>
        /// <param name="Creator">TXTCreating object which will take the HTML and returns it to the HTMLTxtAnalyser</param>
        /// <param name="URL">URL of the HTML to analyse</param>
        /// <returns>List of words</returns>
        public List <string> WordSearching(TXTCreating Creator, string URL)
        {
            //Variables
            char   chrOne   = ' ';
            char   chrTwo   = ' ';
            char   chrThree = ' ';
            char   chrFour  = ' ';
            char   chrFive  = ' ';
            char   chrSix   = ' ';
            char   chrSeven = ' ';
            bool   takeChr  = false;
            bool   scripted = false;
            string word     = "";

            //Objects
            List <string> lstWords = new List <string>();

            //Taking HTML and cleaning it
            string HTML = Creator.TakeHTML(URL);

            if (HTML != null)
            {
                HTML = Regex.Replace(HTML, @"\n", " ");
                HTML = Regex.Replace(HTML, @"\r", " ");
                HTML = Regex.Replace(HTML, @"\t", " ");

                //Loop to search for words
                for (int x = 0; x < HTML.Length; ++x)
                {
                    char currentChr = Convert.ToChar(HTML.Substring(x, 1));

                    chrOne   = chrTwo;
                    chrTwo   = chrThree;
                    chrThree = chrFour;
                    chrFour  = chrFive;
                    chrFive  = chrSix;
                    chrSix   = chrSeven;
                    chrSeven = currentChr;

                    if (chrTwo == 's' && chrThree == 'c' && chrFour == 'r' && chrFive == 'i' && chrSix == 'p' && chrSeven == 't')
                    {
                        scripted = IsScript(chrOne);
                    }
                    if (currentChr == '>')
                    {
                        takeChr = true;
                    }
                    else if (currentChr == '<' && takeChr)
                    {
                        takeChr = false;
                    }
                    else if (takeChr && !scripted)
                    {
                        if (currentChr == ' ')
                        {
                            if (word != "")
                            {
                                word = word.ToLower();
                                lstWords.Add(word);
                                word = "";
                            }
                        }
                        else
                        {
                            word += currentChr;
                        }
                    }
                }
            }
            else
            {
                lstWords = null;
            }
            return(lstWords);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Method to search links
        /// </summary>
        /// <param name="Creator">TXTCreating object which will take the HTML and returns it to the HTMLLinkFinder</param>
        /// <param name="URL">URL of the HTML to analyse</param>
        /// <returns>Return a list of links</returns>
        public List <string> Finder(TXTCreating Creator, string URL)
        {
            //Variables
            char   chrOne       = ' ';
            char   chrTwo       = ' ';
            char   chrThree     = ' ';
            char   chrFour      = ' ';
            string Link         = "";
            bool   takeCar      = false;
            bool   isLink       = false;
            int    chrRemaining = 0;

            //Objects
            List <string> lstLinks = new List <string>();

            //Taking HTML
            string HTML = Creator.TakeHTML(URL);

            if (HTML != null)
            {
                //Loop to search for links
                for (int x = 0; x < HTML.Length; ++x)
                {
                    chrOne   = chrTwo;
                    chrTwo   = chrThree;
                    chrThree = chrFour;
                    chrFour  = Convert.ToChar(HTML.Substring(x, 1));

                    if (chrFour == 'a' && chrThree == '<')
                    {
                        isLink = true;
                    }
                    else if (chrOne == '<' && chrTwo == '/' && chrThree == 'a' && chrFour == '>')
                    {
                        isLink = false;
                    }

                    if (isLink)
                    {
                        if (chrOne == 'h' && chrTwo == 'r' && chrThree == 'e' && chrFour == 'f')
                        {
                            takeCar      = true;
                            chrRemaining = 2;
                        }
                        else if (chrFour == '"' && chrRemaining == 0 && takeCar)
                        {
                            takeCar = false;
                            lstLinks.Add(Link);
                            Link = "";
                        }
                        else if (takeCar)
                        {
                            if (chrRemaining != 0)
                            {
                                chrRemaining--;
                            }
                            else
                            {
                                Link += chrFour;
                            }
                        }
                    }
                }
            }
            else
            {
                lstLinks = null;
            }

            return(lstLinks);
        }