Exemple #1
0
 public void PrintTable(string prefix, TextWriter sw)
 {
     sw.WriteLine("[" + prefix + "] START TABLE");
     for (int i = 0; i < content.Count; i++)
     {
         List<object> subContent = content[i];
         for (int j = 0; j < subContent.Count; j++)
         {
             string subprefix = null;
             if (string.IsNullOrEmpty(prefix))
                 subprefix = ("" + i + "," + j);
             else
                 subprefix += (prefix + "," + i + "," + j);
             object o = subContent[j];
             if (o is ParsedTable)
             {
                 ParsedTable pt = o as ParsedTable;
                 pt.PrintTable(subprefix, sw);
             }
             else
             {
                 sw.WriteLine("[" + subprefix + "] START CONTENT");
                 if (o == null)
                     sw.WriteLine("NULL");
                 else
                     sw.WriteLine(o.ToString());
                 sw.WriteLine("[" + subprefix + "] END CONTENT");
             }
         }
     }
     sw.WriteLine("[" + prefix + "] END TABLE");
 }
Exemple #2
0
        private static ParsedTable parseHtmlAsTable(string mainString)
        {
            //At this point, we know that the entire "mainString" occurred immediately within
            //"<table> and "</endtable>"
            //therefore, we have to parse it into rows and columns.
            //this function will parse it into rows

            ParsedTable table            = new ParsedTable();
            int         searchStartIndex = 0;

            while (true)
            {
                int trsi = -1;
                int trei = -1;
                int i1   = mainString.IndexOf("<tr>", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                int i2   = mainString.IndexOf("<tr ", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                if (i1 == -1 && i2 == -1)
                {
                    break;
                }
                if (i1 == -1)
                {
                    trsi = i2;
                }
                else if (i2 == -1)
                {
                    trsi = i1;
                }
                else
                {
                    trsi = Math.Min(i1, i2);
                }
                int openingTableTagEnd = mainString.IndexOf(">", trsi);
                trei = findMatchingEndTag(mainString, "tr", trsi);

                int           rowIndex     = table.addRow();
                string        subString    = mainString.Substring(openingTableTagEnd + 1, trei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n');
                List <object> objectsInRow = parseHtmlAsTableRow(subString);
                foreach (object o in objectsInRow)
                {
                    table.addColumn(rowIndex, o);
                }

                searchStartIndex = trei + "</tr>".Length;
            }

            return(table);
        }
Exemple #3
0
 public static string GetValue(ParsedTable parsedTable, int[] indices)
 {
     if (indices.Length % 2 != 0) throw new InvalidOperationException();
     int i = 0;
     object input = parsedTable;
     while (i < indices.Length)
     {
         ParsedTable table = (ParsedTable)input;
         input = table[indices[i], indices[i + 1]];
         i += 2;
     }
     string inputVal = input.ToString();
     inputVal = inputVal.Replace("&nbsp;", " ");
     inputVal = inputVal.Trim();
     return inputVal;
 }
Exemple #4
0
        public static Object ParseHtmlIntoTables(string mainString, bool encapsulateInTable)
        {
            List<object> objectsOnPage = new List<object>();

            if (string.IsNullOrEmpty(mainString))
                return null;
            mainString = mainString.Trim(' ', '\t', '\r', '\n');

            int searchStartIndex = 0;
            ParsedTable table = null;
            while (true)
            {
                int tsi = -1, tei = -1;
                if (string.IsNullOrEmpty(mainString))
                    break;
                int i1 = mainString.IndexOf("<table>", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                int i2 = mainString.IndexOf("<table ", searchStartIndex, StringComparison.OrdinalIgnoreCase);
                if (i1 == -1 && i2 == -1)
                {
                    break;
                }
                if (i1 == -1)
                    tsi = i2;
                else if (i2 == -1)
                    tsi = i1;
                else
                    tsi = Math.Min(i1, i2);
                int openingTableTagEnd = mainString.IndexOf(">", tsi);
                tei = findMatchingEndTag(mainString, "table", tsi);

                if (tsi > searchStartIndex)
                {
                    string subTextString = mainString.Substring(searchStartIndex, tsi - searchStartIndex).Trim(' ', '\t', '\r', '\n');
                    objectsOnPage.Add(subTextString);
                    Debug.Assert(subTextString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1);
                    Debug.Assert(subTextString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1);
                    Debug.Assert(subTextString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1);
                }

                string subString = mainString.Substring(openingTableTagEnd + 1, tei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n');
                table = parseHtmlAsTable(subString);
                objectsOnPage.Add(table);
                table = null;
                searchStartIndex = tei + "</table>".Length;
            }

            string endString = mainString.Substring(searchStartIndex).Trim(' ', '\t', '\r', '\n');
            if (!string.IsNullOrEmpty(endString))
            {
                objectsOnPage.Add(endString);
                Debug.Assert(endString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1);
                Debug.Assert(endString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1);
                Debug.Assert(endString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1);
            }

            if (objectsOnPage.Count == 0)
                return null;
            if (objectsOnPage.Count == 1)
            {
                object o = objectsOnPage[0];
                if (o is ParsedTable)
                    return o;
                if (!encapsulateInTable)
                    return o;
                table = new ParsedTable();
                table.addRow();
                table.addColumn(0, o);
                return table;
            }
            else
            {
                table = new ParsedTable();
                table.addRow();
                foreach (object o in objectsOnPage)
                {
                    table.addColumn(0, o);
                }
                return table;
            }
        }