} // XML /// <summary> /// Load from an XML file /// </summary> /// <param name="file"></param> /// <returns></returns> public static XDict Load(String file) { XDict d = Utils.LoadXml <XDict>(file); d.Reindex(); return(d); }
public XDict Parse(string file) { XDict dict = new XDict(); FillHeader(dict); // Read using (Stream s = new FileStream(file, FileMode.Open, FileAccess.Read)) { byte[] buf = new byte[6]; int pos = s.Read(buf, 0, buf.Length); // First four bytes: BGL signature 0x12340001 or 0x12340002 (big-endian) if (pos < 6 || (buf[0] == 0x12 && buf[1] == 0x34 && buf[2] == 0x00 && (buf[4] == 0x01 || buf[4] == 0x02))) { throw new FileLoadException("Invalid file: no BGL signature: " + file); } int gzipHeaderPos = buf[4] << 8 | buf[5]; if (gzipHeaderPos < 6) { throw new FileLoadException("No gzip ptr"); } s.Seek(gzipHeaderPos, SeekOrigin.Begin); parseMetaData(new GZipInputStream(s)); if (null == this.SrcEncoding && string.IsNullOrWhiteSpace(this.SrcEnc)) { throw new InvalidDataException("Failed to detect source encoding in BGL file. Please provide encoding via proper constructor."); } else if (null == this.SrcEncoding) { // if SrcEncoding not set at command line this.SrcEncoding = Encoding.GetEncoding(this.SrcEnc); } if (null == this.DstEncoding && string.IsNullOrWhiteSpace(this.DstEnc)) { throw new InvalidDataException("Failed to detect destination encoding in BGL file. Please provide encoding via proper constructor."); } else if (null == this.DstEncoding) { // if DstEncoding not set at command line this.DstEncoding = Encoding.GetEncoding(this.DstEnc); } s.Seek(gzipHeaderPos, SeekOrigin.Begin); ParseUnzipped(new GZipInputStream(s), dict); } return(dict); }
public XDict Parse(string file) { XDict dict = new XDict(); FillHeader(dict); // Read using (Stream s = new FileStream(file, FileMode.Open, FileAccess.Read)) { byte[] buf = new byte[6]; int pos = s.Read(buf, 0, buf.Length); // First four bytes: BGL signature 0x12340001 or 0x12340002 (big-endian) if (pos < 6 || (buf[0] == 0x12 && buf[1] == 0x34 && buf[2] == 0x00 && (buf[4] == 0x01 || buf[4] == 0x02))) { throw new FileLoadException("Invalid file: no BGL signature: " + file); } int gzipHeaderPos = buf[4] << 8 | buf[5]; if (gzipHeaderPos < 6) { throw new FileLoadException("No gzip ptr"); } s.Seek(gzipHeaderPos, SeekOrigin.Begin); parseMetaData(new GZipInputStream(s)); if (null == this.SrcEncoding && string.IsNullOrWhiteSpace(this.SrcEnc)) { throw new InvalidDataException("Failed to detect source encoding in BGL file. Please provide encoding via proper constructor."); } else if (null == this.SrcEncoding) { // if SrcEncoding not set at command line this.SrcEncoding = Encoding.GetEncoding(this.SrcEnc); } if (null == this.DstEncoding && string.IsNullOrWhiteSpace(this.DstEnc)) { throw new InvalidDataException("Failed to detect destination encoding in BGL file. Please provide encoding via proper constructor."); } else if (null == this.DstEncoding) { // if DstEncoding not set at command line this.DstEncoding = Encoding.GetEncoding(this.DstEnc); } s.Seek(gzipHeaderPos, SeekOrigin.Begin); ParseUnzipped(new GZipInputStream(s), dict); } return dict; }
internal static void GenerateHtml(XDict dict, string outputPath) { StreamWriter outFileStream = new StreamWriter(outputPath, false, System.Text.Encoding.UTF8); int numOfEntries = dict.Entries.Count; int curEntry = 1; int top = Console.CursorTop; int left = Console.CursorLeft; //header outFileStream.WriteLine("<html>\n\t<head>\n\t\t<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" />\n\t\t<title>Babylon Dictionary</title>\n\t</head>\n\t<body>"); foreach(XDictEntry entry in dict.Entries) { Console.SetCursorPosition(left, top); Console.Write("Entry #{0}/{1} ({2})...", curEntry.ToString(), numOfEntries.ToString(), (double.Parse(curEntry.ToString()) / double.Parse(numOfEntries.ToString())).ToString("P")); curEntry++; outFileStream.WriteLine("\t\t<idx:entry name=\"dic\">"); outFileStream.WriteLine("\t\t\t\t<idx:orth>" + entry.Word.Text); if (entry.Comments.Count > 0) { outFileStream.WriteLine("\t\t\t\t<idx:infl>"); foreach (XWordInfo comment in entry.Comments) { if (!string.IsNullOrWhiteSpace(comment.Text)) outFileStream.WriteLine("\t\t\t\t\t<idx:iform value=\"" + comment.Text + "\" />"); } outFileStream.WriteLine("\t\t\t\t</idx:infl>"); } outFileStream.WriteLine("\t\t\t</idx:orth>"); foreach (XWordInfo definition in entry.Definitions) { outFileStream.WriteLine("\t\t\t<p><ul><li><blockquote>" + definition.Text + "</blockquote></li></ul></p>"); } outFileStream.WriteLine("\t\t</idx:entry>"); } outFileStream.WriteLine("\t</body>\n</html>"); outFileStream.Close(); Console.ForegroundColor = ConsoleColor.Magenta; Console.WriteLine("\n\nGenerated HTML file \"{0}\".", Path.GetFileName(outputPath), numOfEntries); }
void ParseUnzipped(Stream s, XDict dict) { while (true) { BglBlock block = BglBlock.Read(s); if (block == null) { break; } if (block.Type == 1 || block.Type == 10) { XDictEntry e = ParseEntry(block); dict.AddEntry(e); } } }
protected void FillHeader(XDict dict) { //dict.Source = "BCL: " + _bclName; //dict.Url = @"http://www.babylon.com/define/122/English-Thai-Dictionary.html"; //dict.Comment = "License unknown. Content is user created, and the website screams 'free dictionary', but BCL format is closed, no public spec."; }