예제 #1
0
        private Company CompanyNames(string input)
        {
            HTMLSearchResult searcher = new HTMLSearchResult(input); //create a simple searcher object
            HTMLSearchResult result; //create a temporary object

            result = searcher.GetTagData("tr");
            string paperCode = HtmlHelper.CutOffValueFromQuotas(result.TAGAttribute, "id=", '"');
            result = searcher.GetTagData("tr").GetTagData("td").GetTagData("a");
            string name = result.TAGData;
            result = searcher.GetTagData("tr").GetTagData("td", 2);
            string id = result.TAGData.Trim();

            Company c = new Company(id);
            c.Name = name;
            c.PaperCode = paperCode;
            return c;
        }
예제 #2
0
 private HTMLSearchResult Result(string str)
 {
     HTMLSearchResult ret = new HTMLSearchResult();
     ret.sSearchResult = str;
     return ret;
 }
예제 #3
0
        //This function returns the html between given two search strings
        /// <summary>
        ///Extracts the Nth occurance of specified tag from the current HTML data.
        /// <param name="sFileData">The HTML page data, that contains the entire html page</param>
        /// <param name="sSearchTag">TAG to search</param>
        /// <param name="nOccurance">The Nth occurance to search for</param>
        /// <returns>Returns an HTMLSearchResult object containing the tag data.</returns>
        /// </summary>
        public HTMLSearchResult GetTagData(string sFileData, string sSearchTag, int nOccurance)
        {
            #region General_Variables_&_Validation
            string sTAGData = "";
            int nStartPos = -1;
            int nEndPos = -1;

            TAGStack = new Stack<string>();
            int nFoundOccurance = 0; // keep track of the no of instances we found, of the search tag.
            int nLen = sFileData.Length;
            int nLevel = 0;
            bool bFound = false;

            TAGStack.Clear();

            if (nLen < 1)
            {
                throw (new ArgumentNullException("File Data cannot be null or blank string"));
            }
            if (sSearchTag.Length < 1)
            {
                throw (new ArgumentNullException("Search Tag cannot be null or blank string"));
            }
            if (nOccurance < 1)
            {
                throw (new ArgumentOutOfRangeException("The occurance number cannot be less than zero."));
            }
            //--------------------START THE SEARCH-----------------
            #endregion
            try
            {
                nLen = nLen - sSearchTag.Length + 1; //the part where we can compare
                for (int i = 0; i < nLen; i++)
                {
                    if (bFound == false)
                    {
                        if (sFileData[i] == '<' && sFileData[i + 1] != '/' && sFileData[i + 1] != '!')  //found some tag...
                        {
                            i++;
                            int nLastPos = i;
                            sTAGAttribute = ""; //its class member.
                            string sTAGName = ReadTillEndOfTag(sFileData, ref i, out sTAGAttribute);
                            if (string.Compare(sSearchTag, sTAGName, true) == 0)
                            {
                                ShowPos(sSearchTag, nLevel, true);
                                bFound = true;
                                nStartPos = i + 1;//Need to fix this...
                            }
                        }
                        else if (sFileData[i] == '<' && sFileData[i + 1] == '/') //end tag found
                        {
                            string sEndTAG = "";
                            i += 2;
                            while (sFileData[i] != '>')
                                sEndTAG += sFileData[i++];

                            if (String.Compare(sEndTAG, sSearchTag, true) == 0 && nLevel == 0)
                            {
                                throw (new Exception("The Start tag was not found, however its end tag was found"));
                            }

                        }
                        else
                        {
                            continue;
                        }
                    }//bFound==false
                    else
                    {
                        if (sFileData[i] == '<' && sFileData[i + 1] != '/' && sFileData[i + 1] != '!')  //found some tag...
                        {
                            i++;
                            string sTagAttribute = "";
                            string sTAGName = ReadTillEndOfTag(sFileData, ref i, out sTagAttribute);
                            if (String.Compare(sTAGName, "script", true) == 0)
                            {
                                int k = i + 1;
                                while (i < sFileData.Length)
                                {
                                    string substr = sFileData.Substring(k, 9);
                                    if (String.Compare(substr, "</script>", true) == 0)
                                    {
                                        i = k + 9;
                                        break;
                                    }
                                    k++;
                                }
                            }
                            else if (String.Compare(sTAGName, "input", true) != 0 && String.Compare(sTAGName, "link", true) != 0 && String.Compare(sTAGName, "br", true) != 0 && String.Compare(sTAGName, "meta", true) != 0 && String.Compare(sTAGName, "img", true) != 0)
                            {
                                TAGStack.Push(sTAGName);
                                nLevel++;
                                ShowPos(sTAGName, nLevel, true);
                            }

                        }
                        else if (sFileData[i] == '<' && sFileData[i + 1] == '/')  //end tag found
                        {
                            int nLastCharPos = i - 1;
                            string sEndTAG = "";
                            i += 2;
                            while (sFileData[i] != '>')
                                sEndTAG += sFileData[i++];
                            ShowPos(sEndTAG, nLevel, false);
                            if (String.Compare(sEndTAG, sSearchTag, true) == 0 && nLevel == 0)
                            {
                                nFoundOccurance++;
                                nEndPos = nLastCharPos;
                                bFound = false;

                                if (nFoundOccurance == nOccurance)
                                {
                                    sTAGData = sFileData.Substring(nStartPos, nEndPos - nStartPos + 1);
                                    //Console.Write("\nSearch String\n======================================" + sTAGData + "\n==============================");
                                    break;
                                }
                                //break;
                            }
                            else
                            {
                                string sPopedTag = TAGStack.Pop();
                                if (string.Compare(sPopedTag, sEndTAG, true) != 0)
                                {
                                    //throw (new Exception("Unknown tag end"));
                                    TAGStack.Push(sPopedTag);
                                    nLevel++;
                                }
                                nLevel--;
                            }
                        }
                        else
                        {
                            continue;
                        }

                    }
                }
            }
            catch (Exception e)
            {
                Console.Write(e.Message);
            }

            HTMLSearchResult result = new HTMLSearchResult();
            result.sSearchResult = sTAGData;
            result.sTAGAttribute = sTAGAttribute;
            return result;
        }