private Company CompanyNames(string input) { HTMLSearchResult searcher = new HTMLSearchResult(input); //create a simple searcher object HTMLSearchResult result; //create a temporary object result = searcher.GetTagData("tr"); string paperCode = HtmlHelper.CutOffValueFromQuotas(result.TAGAttribute, "id=", '"'); result = searcher.GetTagData("tr").GetTagData("td").GetTagData("a"); string name = result.TAGData; result = searcher.GetTagData("tr").GetTagData("td", 2); string id = result.TAGData.Trim(); Company c = new Company(id); c.Name = name; c.PaperCode = paperCode; return c; }
private HTMLSearchResult Result(string str) { HTMLSearchResult ret = new HTMLSearchResult(); ret.sSearchResult = str; return ret; }
//This function returns the html between given two search strings /// <summary> ///Extracts the Nth occurance of specified tag from the current HTML data. /// <param name="sFileData">The HTML page data, that contains the entire html page</param> /// <param name="sSearchTag">TAG to search</param> /// <param name="nOccurance">The Nth occurance to search for</param> /// <returns>Returns an HTMLSearchResult object containing the tag data.</returns> /// </summary> public HTMLSearchResult GetTagData(string sFileData, string sSearchTag, int nOccurance) { #region General_Variables_&_Validation string sTAGData = ""; int nStartPos = -1; int nEndPos = -1; TAGStack = new Stack<string>(); int nFoundOccurance = 0; // keep track of the no of instances we found, of the search tag. int nLen = sFileData.Length; int nLevel = 0; bool bFound = false; TAGStack.Clear(); if (nLen < 1) { throw (new ArgumentNullException("File Data cannot be null or blank string")); } if (sSearchTag.Length < 1) { throw (new ArgumentNullException("Search Tag cannot be null or blank string")); } if (nOccurance < 1) { throw (new ArgumentOutOfRangeException("The occurance number cannot be less than zero.")); } //--------------------START THE SEARCH----------------- #endregion try { nLen = nLen - sSearchTag.Length + 1; //the part where we can compare for (int i = 0; i < nLen; i++) { if (bFound == false) { if (sFileData[i] == '<' && sFileData[i + 1] != '/' && sFileData[i + 1] != '!') //found some tag... { i++; int nLastPos = i; sTAGAttribute = ""; //its class member. string sTAGName = ReadTillEndOfTag(sFileData, ref i, out sTAGAttribute); if (string.Compare(sSearchTag, sTAGName, true) == 0) { ShowPos(sSearchTag, nLevel, true); bFound = true; nStartPos = i + 1;//Need to fix this... } } else if (sFileData[i] == '<' && sFileData[i + 1] == '/') //end tag found { string sEndTAG = ""; i += 2; while (sFileData[i] != '>') sEndTAG += sFileData[i++]; if (String.Compare(sEndTAG, sSearchTag, true) == 0 && nLevel == 0) { throw (new Exception("The Start tag was not found, however its end tag was found")); } } else { continue; } }//bFound==false else { if (sFileData[i] == '<' && sFileData[i + 1] != '/' && sFileData[i + 1] != '!') //found some tag... { i++; string sTagAttribute = ""; string sTAGName = ReadTillEndOfTag(sFileData, ref i, out sTagAttribute); if (String.Compare(sTAGName, "script", true) == 0) { int k = i + 1; while (i < sFileData.Length) { string substr = sFileData.Substring(k, 9); if (String.Compare(substr, "</script>", true) == 0) { i = k + 9; break; } k++; } } else if (String.Compare(sTAGName, "input", true) != 0 && String.Compare(sTAGName, "link", true) != 0 && String.Compare(sTAGName, "br", true) != 0 && String.Compare(sTAGName, "meta", true) != 0 && String.Compare(sTAGName, "img", true) != 0) { TAGStack.Push(sTAGName); nLevel++; ShowPos(sTAGName, nLevel, true); } } else if (sFileData[i] == '<' && sFileData[i + 1] == '/') //end tag found { int nLastCharPos = i - 1; string sEndTAG = ""; i += 2; while (sFileData[i] != '>') sEndTAG += sFileData[i++]; ShowPos(sEndTAG, nLevel, false); if (String.Compare(sEndTAG, sSearchTag, true) == 0 && nLevel == 0) { nFoundOccurance++; nEndPos = nLastCharPos; bFound = false; if (nFoundOccurance == nOccurance) { sTAGData = sFileData.Substring(nStartPos, nEndPos - nStartPos + 1); //Console.Write("\nSearch String\n======================================" + sTAGData + "\n=============================="); break; } //break; } else { string sPopedTag = TAGStack.Pop(); if (string.Compare(sPopedTag, sEndTAG, true) != 0) { //throw (new Exception("Unknown tag end")); TAGStack.Push(sPopedTag); nLevel++; } nLevel--; } } else { continue; } } } } catch (Exception e) { Console.Write(e.Message); } HTMLSearchResult result = new HTMLSearchResult(); result.sSearchResult = sTAGData; result.sTAGAttribute = sTAGAttribute; return result; }