Esempio n. 1
0
        private static ParsedTable parseHtmlAsTable(string mainString)
        {
            //At this point, we know that the entire "mainString" occurred immediately within
            //"<table> and "</endtable>"
            //therefore, we have to parse it into rows and columns.
            //this function will parse it into rows

            ParsedTable table            = new ParsedTable();
            int         searchStartIndex = 0;

            while (true)
            {
                int trsi = -1;
                int trei = -1;
                int i1   = mainString.IndexOf("<tr>", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                int i2   = mainString.IndexOf("<tr ", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                if (i1 == -1 && i2 == -1)
                {
                    break;
                }
                if (i1 == -1)
                {
                    trsi = i2;
                }
                else if (i2 == -1)
                {
                    trsi = i1;
                }
                else
                {
                    trsi = Math.Min(i1, i2);
                }
                int openingTableTagEnd = mainString.IndexOf(">", trsi);
                trei = findMatchingEndTag(mainString, "tr", trsi);

                int           rowIndex     = table.addRow();
                string        subString    = mainString.Substring(openingTableTagEnd + 1, trei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n');
                List <object> objectsInRow = parseHtmlAsTableRow(subString);
                foreach (object o in objectsInRow)
                {
                    table.addColumn(rowIndex, o);
                }

                searchStartIndex = trei + "</tr>".Length;
            }

            return(table);
        }
Esempio n. 2
0
        public static Object ParseHtmlIntoTables(string mainString, bool encapsulateInTable)
        {
            List<object> objectsOnPage = new List<object>();

            if (string.IsNullOrEmpty(mainString))
                return null;
            mainString = mainString.Trim(' ', '\t', '\r', '\n');

            int searchStartIndex = 0;
            ParsedTable table = null;
            while (true)
            {
                int tsi = -1, tei = -1;
                if (string.IsNullOrEmpty(mainString))
                    break;
                int i1 = mainString.IndexOf("<table>", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                int i2 = mainString.IndexOf("<table ", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                if (i1 == -1 && i2 == -1)
                {
                    break;
                }
                if (i1 == -1)
                    tsi = i2;
                else if (i2 == -1)
                    tsi = i1;
                else
                    tsi = Math.Min(i1, i2);
                int openingTableTagEnd = mainString.IndexOf(">", tsi);
                tei = findMatchingEndTag(mainString, "table", tsi);

                if (tsi > searchStartIndex)
                {
                    string subTextString = mainString.Substring(searchStartIndex, tsi - searchStartIndex).Trim(' ', '\t', '\r', '\n');
                    objectsOnPage.Add(subTextString);
                    Debug.Assert(subTextString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1);
                    Debug.Assert(subTextString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1);
                    Debug.Assert(subTextString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1);
                }

                string subString = mainString.Substring(openingTableTagEnd + 1, tei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n');
                table = parseHtmlAsTable(subString);
                objectsOnPage.Add(table);
                table = null;
                searchStartIndex = tei + "</table>".Length;
            }

            string endString = mainString.Substring(searchStartIndex).Trim(' ', '\t', '\r', '\n');
            if (!string.IsNullOrEmpty(endString))
            {
                objectsOnPage.Add(endString);
                Debug.Assert(endString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1);
                Debug.Assert(endString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1);
                Debug.Assert(endString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1);
            }

            if (objectsOnPage.Count == 0)
                return null;
            if (objectsOnPage.Count == 1)
            {
                object o = objectsOnPage[0];
                if (o is ParsedTable)
                    return o;
                if (!encapsulateInTable)
                    return o;
                table = new ParsedTable();
                table.addRow();
                table.addColumn(0, o);
                return table;
            }
            else
            {
                table = new ParsedTable();
                table.addRow();
                foreach (object o in objectsOnPage)
                {
                    table.addColumn(0, o);
                }
                return table;
            }
        }