private static DblpContext LoadDblpXml(string dblpXml) { var context = new DblpContext(); var objs = new Dictionary<Int32, RawObject>(); var links = new Dictionary<Int32, RawLink>(); var objAtts = new HashSet<String>(); var lnkAtts = new HashSet<String>(); var progress = 0; //Open file using (var f = System.IO.File.OpenText(dblpXml)) { //Read Raw Objects //Read Raw Links //Read Raw Attributes and apply them to raw links and objects var elements = GetXmlElements(f); foreach (var e in elements) { progress++; var obj = ParseObject(e); var lnk = ParseLink(e); var att = ParseAttribute(e); if (obj != null) objs.Add(obj.Id, obj); if (lnk != null) links.Add(lnk.Id, lnk); if (att != null) { switch (att.Key) { case "in-year": case "in-number": case "in-volume": case "month": case "pages": case "in-proceedings": case "link-type": var thelnk = default(RawLink); links.TryGetValue(att.ObjectId, out thelnk); thelnk.Attributes.Add(att.Key, att.Value); lnkAtts.Add(att.Key); break; default: var theObj = default(RawObject); objs.TryGetValue(att.ObjectId, out theObj); theObj.Attributes.Add(att.Key, att.Value); objAtts.Add(att.Key); break; } if (progress % 1000 == 0) Console.Write("/"); } if (progress % 10000 == 0) Console.Write("."); } f.Close(); } Console.WriteLine(); Console.WriteLine("Writing the Link File"); progress = 0; //Write the result in two files: Links, and Objects var lnkAttsList = lnkAtts.ToList(); using(var lnkFile = System.IO.File.CreateText(dblpXml+".lnk.csv")) { progress ++; if (progress % 10000 == 0) Console.Write("."); lnkFile.Write("LinkId, From, To"); lnkAttsList.ForEach(l => lnkFile.Write( "," + l )); lnkFile.WriteLine(); foreach (var lnk in links.Values) { lnkFile.Write("{0}, {1}, {2}", lnk.Id, lnk.From, lnk.To); lnkAttsList.ForEach(l => lnkFile.Write( lnk.Attributes.ContainsKey(l) ? "," + lnk.Attributes[l] : "," )); lnkFile.WriteLine(); } lnkFile.Close(); } Console.WriteLine(); Console.WriteLine("Writing Object Files"); var objAttsList = objAtts.ToList(); progress = 0; using (var objFile = System.IO.File.CreateText(dblpXml + ".obj.csv")) { progress++; if (progress % 10000 == 0) Console.Write("~"); objFile.Write("Id"); objAttsList.ForEach(l => objFile.Write("," + l.Replace(",", "~"))); objFile.WriteLine(); foreach (var obj in objs.Values) { objFile.Write("{0}", obj.Id); objAttsList.ForEach(l => objFile.Write(obj.Attributes.ContainsKey(l) ? "," + obj.Attributes[l].Replace(",", "~") : ",")); objFile.WriteLine(); } objFile.Close(); } return context; }
private static DblpContext LoadDblpXml(string dblpXml) { var context = new DblpContext(); var objs = new Dictionary <Int32, RawObject>(); var links = new Dictionary <Int32, RawLink>(); var objAtts = new HashSet <String>(); var lnkAtts = new HashSet <String>(); var progress = 0; //Open file using (var f = System.IO.File.OpenText(dblpXml)) { //Read Raw Objects //Read Raw Links //Read Raw Attributes and apply them to raw links and objects var elements = GetXmlElements(f); foreach (var e in elements) { progress++; var obj = ParseObject(e); var lnk = ParseLink(e); var att = ParseAttribute(e); if (obj != null) { objs.Add(obj.Id, obj); } if (lnk != null) { links.Add(lnk.Id, lnk); } if (att != null) { switch (att.Key) { case "in-year": case "in-number": case "in-volume": case "month": case "pages": case "in-proceedings": case "link-type": var thelnk = default(RawLink); links.TryGetValue(att.ObjectId, out thelnk); thelnk.Attributes.Add(att.Key, att.Value); lnkAtts.Add(att.Key); break; default: var theObj = default(RawObject); objs.TryGetValue(att.ObjectId, out theObj); theObj.Attributes.Add(att.Key, att.Value); objAtts.Add(att.Key); break; } if (progress % 1000 == 0) { Console.Write("/"); } } if (progress % 10000 == 0) { Console.Write("."); } } f.Close(); } Console.WriteLine(); Console.WriteLine("Writing the Link File"); progress = 0; //Write the result in two files: Links, and Objects var lnkAttsList = lnkAtts.ToList(); using (var lnkFile = System.IO.File.CreateText(dblpXml + ".lnk.csv")) { progress++; if (progress % 10000 == 0) { Console.Write("."); } lnkFile.Write("LinkId, From, To"); lnkAttsList.ForEach(l => lnkFile.Write("," + l)); lnkFile.WriteLine(); foreach (var lnk in links.Values) { lnkFile.Write("{0}, {1}, {2}", lnk.Id, lnk.From, lnk.To); lnkAttsList.ForEach(l => lnkFile.Write(lnk.Attributes.ContainsKey(l) ? "," + lnk.Attributes[l] : ",")); lnkFile.WriteLine(); } lnkFile.Close(); } Console.WriteLine(); Console.WriteLine("Writing Object Files"); var objAttsList = objAtts.ToList(); progress = 0; using (var objFile = System.IO.File.CreateText(dblpXml + ".obj.csv")) { progress++; if (progress % 10000 == 0) { Console.Write("~"); } objFile.Write("Id"); objAttsList.ForEach(l => objFile.Write("," + l.Replace(",", "~"))); objFile.WriteLine(); foreach (var obj in objs.Values) { objFile.Write("{0}", obj.Id); objAttsList.ForEach(l => objFile.Write(obj.Attributes.ContainsKey(l) ? "," + obj.Attributes[l].Replace(",", "~") : ",")); objFile.WriteLine(); } objFile.Close(); } return(context); }