private static bool processQed(string url, string filename, string source) { LibSys.StatusBar.Trace("IP: process qed filename=" + filename); int eqCount = 0; int lineno = 0; StreamReader stream = null; try { string line; int state = 0; int tdcount = 0; string[] infos = null; RetStr retStr = new RetStr(); stream = new StreamReader(filename); while((line = stream.ReadLine()) != null) { lineno++; string lineL = line.ToLower(); try { switch(state) { case 0: // look for header of the info table if(lineL.StartsWith("<td headers=\"t7\"> ")) { state = 1; //LibSys.StatusBar.Trace("IP: EarthquakesCache:processQed() state 1 filename=" + filename); } break; case 1: // look for "<tr" if(lineL.StartsWith("</table")) { state = 99; //LibSys.StatusBar.Trace("IP: EarthquakesCache:processQed() state 99 filename=" + filename); break; } if(lineL.StartsWith("<tr")) { state = 2; tdcount = 0; retStr.str = null; infos = new string[9]; infos[8] = source; //LibSys.StatusBar.Trace("IP: EarthquakesCache:processQed() state 2 filename=" + filename); } break; case 2: // read lines with "<td" if(lineL.StartsWith("</tr")) { state = 1; } else { if(lineL.StartsWith("<td")) { string info = stripTags(line, retStr); if(tdcount < infos.Length) { infos[tdcount] = info; //LibSys.StatusBar.Trace("IP: infos[" + tdcount + "]='" + info + "'"); if(tdcount == 5) { infos[5] = ""; infos[6] = info; //LibSys.StatusBar.Trace("url: '" + url + "' retStr.str='" + retStr.str + "'"); infos[7] = ResolveUrl(url, retStr); insertEarthquake(infos); eqCount++; } } else { // wrong number of columns in the table - look for another row state = 1; } tdcount++; } } break; case 99: // nothing to parse left break; } } catch (Exception ee) { LibSys.StatusBar.Error("process qed: file=" + filename + " line=" + lineno + " " + ee.Message); } } } catch (Exception eee) { LibSys.StatusBar.Error("process qed: " + eee.Message); } finally { if(stream != null) { stream.Close(); } } LibSys.StatusBar.Trace("OK: process qed filename=" + filename + " lines=" + lineno + " earthquakes=" + eqCount); // warn of possible format problem: if(lineno > 10 && eqCount == 0) { string str = "URL " + url + " was parsed to 0 earthquakes"; MessageBox.Show(str, possibleFormatError, MessageBoxButtons.OK, MessageBoxIcon.Exclamation); } return eqCount > 0; }
// ------------------------------------------------------------------------------------------------------- // the following method processes CSV feeds from http://earthquake.usgs.gov/eqcenter/recenteqsww/catalogs/ // the data looks like this: // Src,Eqid,Version,Datetime,Lat,Lon,Magnitude,Depth,NST // ci,14216812,1,"March 13, 2006 03:05:33 GMT",36.0260,-117.7626,1.3,2.80,11 // nc,51169078,1,"March 13, 2006 03:04:10 GMT",38.4913,-122.2845,1.3,4.80, 9 // ak,00058271,5,"March 13, 2006 02:23:40 GMT",60.0904,-148.3955,3.3,5.00,19 // Src,Eqid,Version,Datetime,Lat,Lon,Magnitude,Depth,NST // nc,51194079,1,"Sunday, January 6, 2008 06:08:28 UTC",38.8253,-122.7992,1.7,2.70,23 // nc,51194078,1,"Sunday, January 6, 2008 05:39:34 UTC",38.7757,-122.7150,1.1,2.30,14 // ci,10298345,1,"Sunday, January 6, 2008 05:23:28 UTC",34.5043,-116.5191,1.4,4.00,27 private static bool processBulletinCsv(string url, string filename, string source) { LibSys.StatusBar.Trace("IP: process bulletin filename=" + filename); int eqCount = 0; int lineno = 0; StreamReader stream = null; try { string line; string[] infos = null; RetStr retStr = new RetStr(); stream = new StreamReader(filename); while((line = stream.ReadLine()) != null) { lineno++; string lineL = line.ToLower(); try { if(lineL.StartsWith("src,")) { retStr.str = null; infos = new string[9]; infos[8] = source; continue; } else if(line.Length > 10) // ignore empty lines etc. { retStr.str = null; infos = new string[9]; string[] split = parseCsv(line); string network = split[0].ToUpper(); string equrl; switch(network) { case "US": equrl = "http://earthquake.usgs.gov/eqcenter/recenteqsww/Quakes/" + split[0] + split[1] + ".php"; break; default: equrl = "http://earthquake.usgs.gov/eqcenter/recenteqsus/Quakes/" + split[0] + split[1] + ".php"; break; } string comment = split[6] + " from " + network; infos[0] = split[3].Replace(" GMT", "").Replace(" UTC", "").Replace("Monday, ", "").Replace("Tuesday, ", "").Replace("Wednesday, ", "").Replace("Thursday, ", "").Replace("Friday, ", "").Replace("Saturday, ", "").Replace("Sunday, ", ""); // Time UTC infos[1] = split[4]; // lat infos[2] = split[5]; // lng infos[3] = split[7]; // depth infos[4] = split[6]; // magn infos[5] = ""; // quality infos[6] = comment; infos[7] = equrl; infos[8] = source + "-" + network; //LibSys.StatusBar.Trace("url: '" + url + "' retStr.str='" + retStr.str + "'"); insertEarthquake(infos); eqCount++; } } catch (Exception ee) { LibSys.StatusBar.Error("process bulletin-csv: file=" + filename + " line=" + lineno + " " + ee); } } } catch (Exception eee) { LibSys.StatusBar.Error("process bulletin-csv: " + eee.Message); } finally { if(stream != null) { stream.Close(); } } LibSys.StatusBar.Trace("OK: filename=" + filename + " lines=" + lineno + " earthquakes=" + eqCount); // warn of possible format problem: if(lineno > 10 && eqCount == 0) { string str = "URL " + url + " was parsed to 0 earthquakes"; MessageBox.Show(str, possibleFormatError, MessageBoxButtons.OK, MessageBoxIcon.Exclamation); } return eqCount > 0; }
private static string ResolveUrl(string sUrl, RetStr retStr) { string ret = null; if(retStr.str != null) { int slashPos = sUrl.LastIndexOf("/"); if(retStr.str.StartsWith("http://")) { ret = retStr.str; } else if(retStr.str.StartsWith("/")) { int dslashPos = sUrl.IndexOf("//", 0); slashPos = sUrl.IndexOf("/", dslashPos + 2); ret = sUrl.Substring(0, slashPos) + retStr.str; } else { ret = sUrl.Substring(0, slashPos + 1) + retStr.str; } } return ret; }
// here is an example of line we need to clean up: // <td headers="t7" align="left" valign="top"><font face="helvetica"><a href="020205051243.html" onMouseOver="window.status='<REN> NEVADA 02/02/05 05:12:43'"><REN> NEVADA</a></font></td> // retStr returns link, if located in the line private static string stripTags(string line, RetStr retStr) { string ret = ""; string tmp = line; bool inTag = false; bool inQuotes = false; bool isHref = false; int tag0 = 0; int tag1 = 0; int quote0 = 0; int quote1 = 0; int maxcnt = 400; int cur = 0; try { //LibSys.StatusBar.Trace("line=" + line); while(cur < tmp.Length && maxcnt-- > 0) { char curCh = tmp.ToCharArray(cur, 1)[0]; //LibSys.StatusBar.Trace("char=" + curCh + " at cur=" + cur); if(inTag && inQuotes) { // look for ending quote switch(curCh) { case '"': inQuotes = false; quote1 = cur; // LibSys.StatusBar.Trace("deleting quotes from=" + quote0 + " to=" + quote1); if(isHref) { retStr.str = tmp.Substring(quote0+1, quote1-quote0-1); isHref = false; //LibSys.StatusBar.Trace("linkUrl='" + retStr.str + "'"); } tmp = sbDelete(tmp, quote0, quote1+1); // LibSys.StatusBar.Trace("deleted ok - line=" + tmp); cur = quote0 - 1; break; } } else if(inTag) { // look for quotes or end of tag switch(curCh) { case '"': inQuotes = true; quote0 = cur; break; case '>': inTag = false; tag1 = cur; // LibSys.StatusBar.Trace("deleting tag from=" + quote0 + " to=" + quote1); tmp = sbDelete(tmp, tag0, tag1+1); // LibSys.StatusBar.Trace("deleted ok - line=" + tmp); cur = tag0 - 1; break; } } else { // look for tag switch(curCh) { case '<': inTag = true; tag0 = cur; if(tmp.Substring(cur).ToLower().StartsWith("<a href=")) { isHref = true; } else { isHref = false; } break; } } cur++; } tmp = tmp.Replace(""", "\""); tmp = tmp.Replace(" ", " "); tmp = tmp.Replace("<", "<"); tmp = tmp.Replace("&", "&"); tmp = tmp.Replace(">", ">"); tmp = tmp.Replace(">", ">"); tmp = tmp.Replace("<", "<"); tmp = tmp.Replace(" ", " "); tmp = tmp.Replace(" ", " "); tmp = tmp.Replace(" ", " "); tmp = tmp.Replace(" ", " "); ret = tmp.Trim(); } catch (Exception e) { LibSys.StatusBar.Error("stripTags() cur=" + cur + " e=" + e.Message + " line=" + line); } return ret; }
private static bool processQuakes0(string url, string filename, string source) { LibSys.StatusBar.Trace("IP: process quakes0 filename=" + filename); int eqCount = 0; int lineno = 0; string timezone = "PDT"; // Earthquake() knows to parse "PDT", "HST". Assume PDT fo now. StreamReader stream = null; try { string line; int state = 0; string[] infos = null; RetStr retStr = new RetStr(); stream = new StreamReader(filename); while((line = stream.ReadLine()) != null) { lineno++; string lineL = line.ToLower(); try { switch(state) { case 0: // look for the info section after the second <PRE>, and also try to determine timezone: if(line.IndexOf("Hawaiian Standard Time") != -1) { timezone = "HST"; } if(line.StartsWith("<PRE>")) { state = 1; } break; case 1: // read lines with "<td" if(line.StartsWith("</PRE>")) { state = 99; break; } else { //LibSys.StatusBar.Trace("IP: EarthquakesCache:processQuakes0() state 1 filename=" + filename); retStr.str = null; string info = stripTags(line, retStr); if(info.ToLower().StartsWith("map")) { infos = new string[9]; char[] sep = new Char[1]; sep[0] = ' '; string[] split = info.Substring(4).Split(sep); string comment = ""; for(int jj=6; jj < split.Length ;jj++) { comment += split[jj] + " "; } comment = comment.Trim(); infos[0] = split[1] + " " + split[2] + " " + timezone; // Earthquake() knows to parse some timezones infos[1] = split[3]; // lat infos[2] = split[4]; // lng infos[3] = split[5]; // depth infos[4] = split[0]; // magn infos[5] = ""; // quality infos[6] = comment; infos[7] = ResolveUrl(url, retStr); infos[8] = source; //LibSys.StatusBar.Trace("url: '" + url + "' retStr.str='" + retStr.str + "'"); insertEarthquake(infos); eqCount++; } } break; case 99: // nothing to parse left break; } } catch (Exception ee) { LibSys.StatusBar.Error("process quakes0: file=" + filename + " line=" + lineno + " " + ee.Message); } } } catch (Exception eee) { LibSys.StatusBar.Error("process quakes0: " + eee.Message); } finally { if(stream != null) { stream.Close(); } } LibSys.StatusBar.Trace("OK: process quakes0 filename=" + filename + " lines=" + lineno + " earthquakes=" + eqCount); // warn of possible format problem: if(lineno > 10 && eqCount == 0) { string str = "URL " + url + " was parsed to 0 earthquakes"; MessageBox.Show(str, possibleFormatError, MessageBoxButtons.OK, MessageBoxIcon.Exclamation); } return eqCount > 0; }