static public void BringRowsTogetherAggregatedComment(String fileTo, String sqlRequest) { StreamWriter sw = new StreamWriter(fileTo, false); DataTable dt = PostGrePlugIn.getTablePostGre(sqlRequest); List <String> countries = PostGrePlugIn.DataTableToList(PostGrePlugIn.getTablePostGre(Common.getCountriesDB)); DateTime date = new DateTime(); String pagename = ""; Dictionary <String, String> values = new Dictionary <string, String>(); values.Add("troll", "0"); values.Add("NS", "0"); foreach (String c in countries) { values.Add(c, "0"); } String outta = ""; bool firstFlag = true; foreach (DataRow dr in dt.Rows) { if ((DateTime.Parse(dr[2].ToString()) != date || dr[0].ToString().Trim() != pagename) && !firstFlag) { outta = date.ToString("yyyy-MM-dd") + "\t" + pagename + "\t"; foreach (String s in values.Keys) { outta += values[s].ToString() + "\t"; } outta = outta.Substring(0, outta.Length - 1); Int32 totalnontroll = 0; foreach (String key in values.Keys) { if (key == "troll") { continue; } totalnontroll += Int32.Parse(values[key]); } sw.WriteLine(outta + "\t" + totalnontroll.ToString()); values = new Dictionary <string, string>(); values.Add("troll", "0"); values.Add("NS", "0"); foreach (String c in countries) { values.Add(c, "0"); } } firstFlag = false; date = DateTime.Parse(dr[2].ToString()); pagename = dr[0].ToString().Trim(); if (dr[3].ToString() == "1") { values["troll"] = dr[5].ToString(); continue; } if (dr[4].ToString() == "") { values["NS"] = dr[5].ToString(); continue; } values[dr[4].ToString().Trim()] = dr[5].ToString(); } sw.Close(); }
static public void ParseXMLwikiForEdits(String file) { XmlReader XMLreader = XmlReader.Create(new StreamReader(Common.wikixmlFile, Encoding.UTF8)); PostGrePlugIn.openConnection(); Console.OutputEncoding = Encoding.Unicode; StreamWriter editsStream = new StreamWriter(Common.editsFile, false, Encoding.UTF8); StreamWriter editorsStream = new StreamWriter(Common.editorsFile, false, Encoding.UTF8); editsStream.Close(); editorsStream.Close(); List <String> editsStore = new List <string>(); Dictionary <String, List <Object> > editorsStore = new Dictionary <string, List <Object> >(); Int64 editsCounter = 0; Int64 editorsCounter = 0; Boolean flagReadTitle = false; Boolean flagReadBody = false; Boolean flagReadPage = false; String pageName = ""; String oldPage = ""; Boolean pageFlag = false; Boolean readRevisionMode = false; String editorID = ""; String comment = ""; String pageID = ""; try { while (XMLreader.Read()) { if (XMLreader.Name == "title" && (XMLreader.NodeType == XmlNodeType.Element)) { flagReadTitle = true; continue; } if (XMLreader.Name == "title" && (XMLreader.NodeType == XmlNodeType.EndElement)) { flagReadTitle = false; continue; } if (XMLreader.Name == "page" && (XMLreader.NodeType == XmlNodeType.Element)) { flagReadPage = true; continue; } if (XMLreader.Name == "page" && (XMLreader.NodeType == XmlNodeType.EndElement)) { flagReadPage = false; readRevisionMode = false; continue; } if (XMLreader.Name == "text" && (XMLreader.NodeType == XmlNodeType.Element)) { flagReadBody = true; continue; } if (XMLreader.Name == "text" && (XMLreader.NodeType == XmlNodeType.EndElement)) { flagReadBody = false; continue; } if (XMLreader.NodeType == XmlNodeType.Text && flagReadTitle) { pageName = XMLreader.Value; if (Common.interestPages.Contains(QuotedPrintable.EncodeQuotedPrintable(pageName))) { readRevisionMode = true; } while (XMLreader.Name != "id") { XMLreader.Read(); } XMLreader.Read(); pageID = XMLreader.Value; flagReadTitle = false; } if (XMLreader.Name == "revision" && XMLreader.NodeType == XmlNodeType.Element) { String timeString = ""; DateTime day = new DateTime(); String ip = "NA"; String country = "NA"; Byte troll = 0; Int32 typetroll = -1; String name = "NA"; Int32 nameID = -1; Double distance = -1; Byte inGroup = 0; if (readRevisionMode) { inGroup = 1; } while (!(XMLreader.NodeType == XmlNodeType.EndElement && XMLreader.Name == "revision")) { XMLreader.Read(); if (XMLreader.NodeType == XmlNodeType.Element && XMLreader.Name == "timestamp") { XMLreader.Read(); timeString = XMLreader.Value; day = DateTime.Parse(timeString); XMLreader.Read(); } if (XMLreader.NodeType == XmlNodeType.Element && XMLreader.Name == "contributor") { while (XMLreader.Name != "ip" && XMLreader.Name != "username") { XMLreader.Read(); } if (XMLreader.Name == "ip") { XMLreader.Read(); ip = editorID = XMLreader.Value; country = PostGrePlugIn.DataTableToList(PostGrePlugIn.getTablePostGre(PostGrePlugIn.ReturnIpQuery("countryipranges", ip, "country")))[0]; PostGrePlugIn.GetTrollResults(PostGrePlugIn.getTablePostGre(PostGrePlugIn.ReturnIpQuery("troll_bases", ip, " c1,c2,c3,c4, distance ")), out troll, out distance, out typetroll); } else { XMLreader.Read(); name = XMLreader.Value.Replace(@"\", "_slash_"); while (XMLreader.Name != "id") { XMLreader.Read(); } XMLreader.Read(); nameID = Int32.Parse(XMLreader.Value); editorID = nameID.ToString(); } while (XMLreader.Name != "contributor") { XMLreader.Read(); } XMLreader.Read(); XMLreader.Read(); if (XMLreader.Name == "comment") { XMLreader.Read(); comment = XMLreader.Value.Replace('\n', ' '); comment = comment.Replace(@"\", " "); comment = comment.Replace('\t', ' '); } else { comment = ""; } break; } } if (!editorsStore.ContainsKey(editorID)) { List <object> list = new List <object>(); list.Add(1); list.Add(1 * inGroup); name = name.Replace(@"\", "_slash_"); list.Add(ip + "\t" + name + "\t" + nameID + "\t" + country + "\t" + troll.ToString() + "\t" + typetroll.ToString() + "\t" + distance.ToString()); editorsStore.Add(editorID, list); } else { editorsStore[editorID][0] = Int32.Parse(editorsStore[editorID][0].ToString()) + 1; editorsStore[editorID][1] = Int32.Parse(editorsStore[editorID][0].ToString()) * inGroup + 1; } if (distance > 0) { ; } editsStore.Add(pageName + "\t" + pageID + "\t" + inGroup.ToString() + "\t" + CleanExactTime(timeString) + "\t" + day.ToString("yyyy-MM-dd") + "\t" + ip + "\t" + name + "\t" + nameID + "\t" + country + "\t" + troll.ToString() + "\t" + typetroll.ToString() + "\t" + distance.ToString() + "\t" + comment); if (editsStore.Count > 10000) { editorsStore = ReleaseFile(Common.editorsFile, editorsStore); } if (editsStore.Count > 10000) { editsStore = ReleaseFile(Common.editsFile, editsStore); } } } } catch (XmlException ex) { Console.WriteLine("Ended abnormally..." + ex); } eend :; }
static public void CreateEditsAndEditorsTables() { Common.interestPages = PostGrePlugIn.DataTableToHashSet(PostGrePlugIn.getTablePostGre(Common.getPagesSQL)); ParseXML.ParseXMLwikiForEdits(Common.wikixmlFile); }