static void Main(string[] args) { Logit.useConsole = true; RecoverOsisData oz; if (args.Length < 2) { Console.WriteLine(@"Syntax: imposis2xml.exe infile xmlfile [logname] infile = file written by mod2imp.exe or an OSIS file like the KJV2006 example xmlfile = xml file for further parsing logname = name of log text file"); oz = new RecoverOsisData(); oz.readImpOsis(@"C:\Users\Kahunapule\Documents\tmp\Wycliffe.imp", @"C:\Users\Kahunapule\Documents\tmp\Wycliffe.usfx"); Logit.CloseFile(); } else { if (args.Length >= 3) { Logit.OpenFile(args[2]); } Console.WriteLine("imposis2xml " + args[0] + " " + args[1]); oz = new RecoverOsisData(); oz.readImpOsis(args[0], args[1]); Logit.CloseFile(); } }
protected bool ScanUsxDir(string usxDir) { bool result = true; try { DirectoryInfo dir = new DirectoryInfo(usxDir); foreach (FileInfo f in dir.GetFiles()) { if (f.Extension.ToLower().CompareTo(".usx") == 0) { ReadUsx(f.FullName); } } foreach (DirectoryInfo di in dir.GetDirectories()) { string fullName = Path.Combine(usxDir, di.Name); if (Directory.Exists(fullName)) { result &= ScanUsxDir(fullName); } } } catch (Exception ex) { Logit.WriteError("Error converting USX files in " + usxDir + " to USFX."); Logit.WriteError(ex.Message); return(false); } return(result); }
private void convertButton_Click(object sender, System.EventArgs e) { Logit.OpenFile("WordSendLog.txt"); Logit.GUIWriteString = new StringDelegate(WriteToStatusListBox); Logit.WriteLine("Sorry, this program is still under construction."); Logit.CloseFile(); }
/// <summary> /// Copy a node from the current XmlTextReader object to the given XmlTextWriter object. /// </summary> /// <param name="xw">the XmlTextWriter object to write to</param> public void CopyNode(XmlTextWriter xw) { switch (NodeType) { case XmlNodeType.Element: xw.WriteStartElement(Name); xw.WriteAttributes(this, true); if (IsEmptyElement) { xw.WriteEndElement(); } break; case XmlNodeType.EndElement: xw.WriteEndElement(); break; case XmlNodeType.Text: xw.WriteString(Value); break; case XmlNodeType.SignificantWhitespace: xw.WriteWhitespace(Value); break; case XmlNodeType.Whitespace: // You could insert xw.WriteWhitespace(Value); to preserve // insignificant white space, but it either adds bloat or // messes up formatting. break; case XmlNodeType.Attribute: xw.WriteAttributeString(Name, Value); break; case XmlNodeType.ProcessingInstruction: xw.WriteProcessingInstruction(Name, Value); break; case XmlNodeType.XmlDeclaration: xw.WriteStartDocument(true); break; default: Logit.WriteLine("Doing NOTHING with type=" + NodeType.ToString() + " Name=" + Name + " Value=" + Value); // DEBUG break; } }
/// <summary> /// Flush the hash table to an XML file ("save"). /// </summary> /// <returns>true iff success</returns> public bool Write() { XmlTextWriter xml = null; bool result = false; string bakFileName = Path.ChangeExtension(fileName, "bak"); try { Utils.DeleteFile(bakFileName); if (File.Exists(fileName)) { File.Move(fileName, bakFileName); } try { xml = new XmlTextWriter(fileName, System.Text.Encoding.UTF8); xml.WriteStartDocument(); xml.Formatting = Formatting.Indented; xml.WriteStartElement("ini"); IDictionaryEnumerator enu = hashTbl.GetEnumerator(); while (enu.MoveNext()) { xml.WriteStartElement("entry"); xml.WriteElementString("key", (string)enu.Key); xml.WriteElementString("value", (string)enu.Value); xml.WriteEndElement(); // entry } xml.WriteEndElement(); // ini xml.WriteEndDocument(); result = true; } finally { if (xml != null) { xml.Close(); } } } catch { Logit.WriteError("Can't write to " + fileName); } return(result); }
/// <summary> /// Convert all USX files with .usx extensions in the given usxDir and directories /// below it to USFX in usfxFile. (Scanning directories below it is to /// allow a pure unzip of an ETEN DBL bundle to be put into the USX directory. /// </summary> /// <param name="usxDir">Directory containing .usx files</param> /// <param name="usfxFile">path and file name of USFX file to write</param> /// <returns></returns> public bool Convert(string usxDir, string usfxFile) { try { processedUsxBooks = string.Empty; scrp = new Scriptures(); scrp.OpenUsfx(usfxFile); ScanUsxDir(usxDir); scrp.CloseUsfx(); } catch (Exception ex) { Logit.WriteError("Error converting USX files in " + usxDir + " to " + usfxFile); Logit.WriteError(ex.Message); return(false); } return(true); }
/// <summary> /// Figure out which lemma index file to write to. /// </summary> /// <param name="theWord">Strong's number (starting with capital H or G)</param> /// <returns>index into array of files</returns> protected int HashLemma(string theWord) { int hash = 0; while ((!Char.IsDigit(theWord[theWord.Length - 1])) && (theWord.Length > 2)) { theWord = theWord.Substring(0, theWord.Length - 1); } if (!Int32.TryParse(theWord.Substring(1), out hash)) { return(0); } hash = hash / 1000; if (theWord[0] == 'H') { hash += 6; } if (hash >= HASHSIZE) { Logit.WriteError("Bad Strong's number: " + theWord); hash = 0; } return(hash); }
/// <summary> /// Create an index of Strong's numbers (corresponding to the lemma or root word lexicon entry number). /// NOTE: Call MakeJsonIndex immediately before calling MakeLemmaIndex. /// </summary> /// <param name="lemmaTextFile"></param> /// <param name="lemmaDir"></param> public void MakeLemmaIndex(string lemmaTextFile, string lemmaDir) { string oneWord; string bookCode; searchTextXml = new XmlTextReader(lemmaTextFile); wordCollection = new Hashtable(19999); StreamWriter[] lemmaFiles; int i, j, lineLength; char ch; char defaultSourceLanguage = 'H'; Utils.EnsureDirectory(lemmaDir); BibleBookRecord br; try { // Read all references to Strong's numbers into wordCollection hash table. while (searchTextXml.Read()) { if ((searchTextXml.NodeType == XmlNodeType.Element) && (searchTextXml.Name == "v")) { bookCode = fileHelper.GetNamedAttribute(searchTextXml, "b"); currentBook = bookInfo.getShortCode(bookCode); br = (BibleBookRecord)bookInfo.books[bookCode]; if (br.testament == "o") { defaultSourceLanguage = 'H'; } else { defaultSourceLanguage = 'G'; } currentChapter = fileHelper.GetNamedAttribute(searchTextXml, "c"); startVerse = currentVerse = fileHelper.GetNamedAttribute(searchTextXml, "v"); // Verse numbers might be verse bridges, like "20-22" or simple numbers, like "20". i = currentVerse.IndexOf('-'); if (i > 0) { startVerse = startVerse.Substring(0, i); } verseID = currentBook + currentChapter + "_" + startVerse; if (!Logit.ShowStatus("Creating lemma index " + verseID)) { searchTextXml.Close(); return; } searchTextXml.Read(); if (includedVerses.Contains(verseID) && (searchTextXml.NodeType == XmlNodeType.Text)) { string s = searchTextXml.Value; for (i = 0; i < s.Length; i++) { if (!Char.IsWhiteSpace(s[i])) { if (word.Length == 0) { if (Char.IsDigit(s[i])) { word.Append(defaultSourceLanguage); } } word.Append(s[i]); } else { AddWordToLemma(); } } AddWordToLemma(); } } } searchTextXml.Close(); // Write search index with fewer files. bool[] commaNeeded = new bool[LEMMASIZE]; // Boolean variables are created with value "false" lemmaFiles = new StreamWriter[LEMMASIZE]; char srcLang = 'G'; for (i = 0, j = 0; i < LEMMASIZE; i++, j++) { if (i == 6) { srcLang = 'H'; j = 0; } lemmaFiles[i] = new StreamWriter(Path.Combine(lemmaDir, "_" + srcLang + j.ToString() + "000.json"), false, Encoding.UTF8); lemmaFiles[i].Write("{\n"); } // Also write combined search index for web server use //wordLocationFile = new StreamWriter(Path.Combine(searchDir, "search.json")); //wordLocationFile.WriteLine("{"); foreach (DictionaryEntry de in wordCollection) { oneWord = (string)de.Key; int hash = HashLemma(oneWord); string longString = (string)de.Value; sqlConcordance.WriteLine("INSERT INTO {0} VALUES (\"{1}\",\"{2}\");", concTableName, oneWord, longString); StringBuilder sb = new StringBuilder(); lineLength = 26 + oneWord.Length; for (i = 0; i < longString.Length; i++) { ch = longString[i]; if (ch == ',') { sb.Append("\","); lineLength += 2; if (lineLength > 100) { sb.Append("\n"); lineLength = 0; } sb.Append("\""); lineLength++; } else { sb.Append(ch); lineLength++; } } if (commaNeeded[hash]) { lemmaFiles[hash].Write(",\n"); } lemmaFiles[hash].Write("\"{0}\":[\"{1}\"]", oneWord, sb.ToString()); commaNeeded[hash] = true; if (!Logit.ShowStatus("Writing lemma index " + oneWord)) { return; } } for (i = 0; i < LEMMASIZE; i++) { lemmaFiles[i].Write("}\n"); lemmaFiles[i].Close(); } sqlConcordance.WriteLine("UNLOCK TABLES;"); sqlConcordance.Close(); } catch (Exception ex) { Logit.WriteError(ex.Message); } }
/// <summary> /// Create an index file to speed searches in Browser Bible /// </summary> /// <param name="verseTextFile">Name of XML file with normalized search text by verse.</param> /// <param name="searchDir">Name of directory to write search files into.</param> /// <parame name="sqlFile">Name of the SQL file to create.</parame> public void MakeJsonIndex(string verseTextFile, string searchDir, string sqlFile) { string oneWord; searchTextXml = new XmlTextReader(verseTextFile); wordCollection = new Hashtable(400009); //StreamWriter wordLocationFile; StreamWriter[] wordFiles; base32string b32 = new base32string(); int i, lineLength; char ch; Utils.EnsureDirectory(searchDir); includedVerses = new HashSet <string>(); sqlConcordance = new StreamWriter(sqlFile, false, Encoding.UTF8); concTableName = Path.GetFileNameWithoutExtension(sqlFile); // Write SQL file preamble sqlConcordance.WriteLine(@"USE sofia; DROP TABLE IF EXISTS sofia.{0}; CREATE TABLE {0} ( keyWord VARCHAR(128) COLLATE UTF8_GENERAL_CI NOT NULL, verseList TEXT NOT NULL) ENGINE=MyISAM; LOCK TABLES {0} WRITE;", concTableName); // Read the verse list while (searchTextXml.Read()) { if ((searchTextXml.NodeType == XmlNodeType.Element) && (searchTextXml.Name == "v")) { currentBook = bookInfo.getShortCode(fileHelper.GetNamedAttribute(searchTextXml, "b")); currentChapter = fileHelper.GetNamedAttribute(searchTextXml, "c"); startVerse = currentVerse = fileHelper.GetNamedAttribute(searchTextXml, "v"); // Verse numbers might be verse bridges, like "20-22" or simple numbers, like "20". i = currentVerse.IndexOf('-'); if (i > 0) { startVerse = startVerse.Substring(0, i); } verseID = currentBook + currentChapter + "_" + startVerse; if (!Logit.ShowStatus("Creating word index " + verseID)) { searchTextXml.Close(); return; } searchTextXml.Read(); if (searchTextXml.NodeType == XmlNodeType.Text) { if (searchTextXml.Value.Trim().Length > 0) { includedVerses.Add(verseID); } IndexWords(searchTextXml.Value); } } } searchTextXml.Close(); // Write search index with fewer files. bool[] commaNeeded = new bool[HASHSIZE]; //bool needComma = false; wordFiles = new StreamWriter[HASHSIZE]; for (i = 0; i < HASHSIZE; i++) { wordFiles[i] = new StreamWriter(Path.Combine(searchDir, "_" + i.ToString() + ".json")); wordFiles[i].Write("{\n"); } foreach (DictionaryEntry de in wordCollection) { oneWord = (string)de.Key; if (oneWord.Length > 0) { int hash = HashWord(oneWord); string longString = (string)de.Value; sqlConcordance.WriteLine("INSERT INTO {0} VALUES (\"{1}\",\"{2}\");", concTableName, oneWord, longString); StringBuilder sb = new StringBuilder(); lineLength = 26 + oneWord.Length; for (i = 0; i < longString.Length; i++) { ch = longString[i]; if (ch == ',') { sb.Append("\","); lineLength += 2; if (lineLength > 100) { sb.Append("\n"); lineLength = 0; } sb.Append("\""); lineLength++; } else { sb.Append(ch); lineLength++; } } if (Char.IsLetter(oneWord[0])) { if (commaNeeded[hash]) { wordFiles[hash].Write(",\n"); } wordFiles[hash].Write("\"{0}\":[\"{1}\"]", oneWord, sb.ToString()); commaNeeded[hash] = true; } if (!Logit.ShowStatus("Writing word index " + oneWord)) { return; } } } for (i = 0; i < HASHSIZE; i++) { wordFiles[i].Write("}\n"); wordFiles[i].Close(); } }
static void Main(string[] args) { int i; bool showBanner = true; string outName = ""; string logName = "WordSendLog.txt"; string jobOptionsName = Environment.GetEnvironmentVariable("APPDATA") + "\\SIL\\WordSend\\joboptions.xml"; string templateName = ""; ArrayList fileSpecs = new ArrayList(127); Logit.useConsole = true; for (i = 0; i < args.Length; i++) { // Scan the command line string s = args[i]; if ((s != null) && (s.Length > 0)) { if (((s[0] == '-') || (s[0] == '/')) && (s.Length > 1)) { // command line switch: take action switch (Char.ToLower(s[1])) { case 'j': // Job options file name jobOptionsName = SFConverter.GetOption(ref i, args); break; case 'n': // No banner display showBanner = false; break; case 'o': // Set output file name outName = SFConverter.GetOption(ref i, args); break; case 't': // Set template file name templateName = SFConverter.GetOption(ref i, args); break; case 'l': // Set log name logName = SFConverter.GetOption(ref i, args); break; case '?': case 'h': case '-': showBanner = true; break; default: Logit.WriteLine("Unrecognized command line switch: " + args[i]); break; } } else { fileSpecs.Add(args[i]); } } } Logit.OpenFile(logName); Logit.WriteLine("\nWordSend project sf2word compiled " + Version.date); if (showBanner) { Logit.WriteLine(""); Logit.WriteLine(Version.copyright); Logit.WriteLine(""); Logit.WriteLine(Version.contact); Logit.WriteLine(@" Syntax: sf2word [-o Output] [-j Job] [-t Template] [-n] [-?] [-l logname] filespec(s) Output = output WordML file name Job = name of XML job options file created with usfm2word.exe. Template = example WordML document with the required style definitions -n = don't display copyright and banner information. -? = cancel previous /n and show this information. -l = set log file name (default is WordSendLog.txt in the current directory) filespec = SFM file specification to read If a JobOptions file is specified, and that file contains the input file specifications, then filespec(s) need not be specified on the command line. If filespec(s) or output files are specified on the command line, then the command line overrides those specifications in the job options file. You may use / instead of - to introduce switches. Do not use / or - as the first character of a file name. "); } // Read XML job options file. Note that this file is only read // from in the command line program, but may be // written to from the Windows UI version. It is reasonable to // set up the options the way you want them in the Windows UI // version of the program, then read them with this command line // version. We convert some command line arguments to entries // in this class for consistency in the handling of options // between the command line and Windows UI versions. SFConverter.jobIni = new XMLini(jobOptionsName); SFConverter.jobIni.WriteString("TemplateName", templateName); if (outName == "") { outName = SFConverter.jobIni.ReadString("outputFileName", "Output.xml"); } if (templateName != "") { SFConverter.jobIni.WriteString("templateName", templateName); } if (fileSpecs.Count < 1) { int numSfmFiles = SFConverter.jobIni.ReadInt("numSfmFiles", 0); for (i = 0; i < numSfmFiles; i++) { fileSpecs.Add( (object)SFConverter.jobIni.ReadString("sfmFile" + i.ToString(), "*.sfm")); } } if (fileSpecs.Count < 1) { Logit.WriteLine("Nothing to do. No input files specified."); } else { // We don't really have a need for application options in the // command line program, but if we did, they would go in the // following place: // SFConverter.appIni = new XMLini(Environment.GetEnvironmentVariable("APPDATA")+"\\SIL\\WordSend\\sf2word.xml"); // Here we instantiate the object that does most of the work. SFConverter.scripture = new Scriptures(); Logit.WriteLine("Job options: " + jobOptionsName); Logit.WriteLine("Output file: " + outName); // Read the input USFM files into internal data structures. for (i = 0; i < fileSpecs.Count; i++) { SFConverter.ProcessFilespec((string)fileSpecs[i]); } // Write out the WordML file. SFConverter.scripture.WriteToWordML(outName); } Logit.CloseFile(); }
/// <summary> /// Read an imp or OSIS file and convert at least the main canonical text to USFX. /// </summary> /// <param name="infile">input imp or OSIS file name</param> /// <param name="outfile">output USFX file name</param> public void readImpOsis(string infile, string outfile) { string line; string inname = infile; string bookAbbr = String.Empty; string bcv = String.Empty; string bk; int currentChapter = 0; int currentVerse = 0; int c = 0; int v = 0; string id = String.Empty; string type = String.Empty; string osisID = String.Empty; string sID = String.Empty; string eID = String.Empty; string lemma = String.Empty; string morph = String.Empty; string added = String.Empty; string marker = String.Empty; string subType = String.Empty; string src = String.Empty; string savlm = String.Empty; string n = String.Empty; string who = String.Empty; StreamReader sr; try { sr = new StreamReader(infile, Encoding.UTF8); line = sr.ReadLine().TrimStart(); sr.Close(); if (line.StartsWith("$$$")) { inname = outfile + ".tMpXmL"; Logit.WriteLine("Converting imp file " + infile + " to imp xml file " + inname); ImpOsis2Xml(infile, inname); } else if (!line.StartsWith("<?xml version=\"1.0\"")) { Logit.WriteError("I don't know what to do with this file: " + infile); return; } xr = new XmlTextReader(inname); holyBooks.OpenUsfx(outfile); Logit.WriteLine("Converting from " + inname + " to USFX file " + outfile); while (xr.Read()) { if ((delayedVerse > 0) && (xr.Name != "milestone")) { if (!inParagraph) { StartNewParagraph("p"); } if (delayedVerse > 0) { WriteEmptyElementWithAttributes("v", "id", delayedVerse.ToString()); delayedVerse = 0; } RestartWJ(); delayedVerse = 0; } if (xr.NodeType == XmlNodeType.Element) { id = GetNamedAttribute("id"); type = GetNamedAttribute("type"); osisID = GetNamedAttribute("osisID"); sID = GetNamedAttribute("sID"); eID = GetNamedAttribute("eID"); lemma = GetNamedAttribute("lemma"); morph = GetNamedAttribute("morph"); added = GetNamedAttribute("added"); marker = GetNamedAttribute("marker"); who = GetNamedAttribute("who"); subType = GetNamedAttribute("subType"); src = GetNamedAttribute("src"); savlm = GetNamedAttribute("savlm"); n = GetNamedAttribute("n"); sfm = GetNamedAttribute("sfm"); switch (xr.Name) { case "header": SkipElement(); break; case "impxml": Logit.WriteLine("Parsing " + inname + " as imp xml."); break; case "osis": Logit.WriteLine("Parsing " + inname + " as OSIS"); break; case "book": StartBook(id); break; case "chapter": bcv = ParseOsisId(osisID, out bk, out c, out v); EndParagraph(); WriteEmptyElementWithAttributes("c", "id", c.ToString()); currentChapter = c; break; case "v": WriteEmptyElementWithAttributes("v", "id", id); break; case "p": StartNewParagraph("p", sfm); break; case "verse": if (eID.Length > 0) { SuspendWJ(); WriteEmptyElementWithAttributes("ve"); } else { bcv = ParseOsisId(osisID, out bk, out c, out v); if (c != currentChapter) { EndParagraph(); WriteEmptyElementWithAttributes("c", "id", c.ToString()); currentChapter = c; } delayedVerse = v; currentVerse = v; } break; case "transChange": holyBooks.xw.WriteStartElement("add"); break; case "div": switch (type) { case "book": bcv = ParseOsisId(osisID, out bk, out c, out v); StartBook(bk); break; case "colophon": StartNewParagraph("p", "ie"); break; } break; case "milestone": if ((type == "x-extra-p") || (type == "x-p")) { SuspendWJ(); StartNewParagraph("p"); if (marker.Length > 0) { holyBooks.xw.WriteString(marker + " "); } } break; case "w": lemma = (lemma + " " + savlm).Trim(); morph = (morph + " " + src).Trim(); wHasContent = (!xr.IsEmptyElement) && ((lemma.Length + morph.Length) > 0); if (wHasContent) { holyBooks.xw.WriteStartElement("w"); if (lemma.Length > 0) { holyBooks.xw.WriteAttributeString("s", lemma.Replace("strong:", "")); } if (morph.Length > 0) { holyBooks.xw.WriteAttributeString("m", morph.Trim()); } if (xr.IsEmptyElement) { holyBooks.xw.WriteEndElement(); } } else { // Otherwise, don't bother with the tag, because we really don't know semantically what it means. Logit.WriteLine("Warning: empty <w> element ignored at " + bcv); } break; case "title": switch (type) { case "main": StartNewParagraph("mt"); break; case "psalm": StartNewParagraph("d"); break; case "acrostic": StartNewParagraph("s"); break; case "chapter": if (!inParagraph) { StartNewParagraph("p"); } SkipElement(); break; } break; case "note": // type="study" if (type == "study") { holyBooks.xw.WriteStartElement("f"); holyBooks.xw.WriteAttributeString("caller", "+"); } else { SkipElement(); } break; case "divineName": holyBooks.xw.WriteStartElement("nd"); break; case "foreign": holyBooks.xw.WriteStartElement("tl"); if (n.Length > 0) { holyBooks.xw.WriteString(" " + n + " "); } break; case "ip": holyBooks.xw.WriteStartElement("p"); holyBooks.xw.WriteAttributeString("sfm", "ip"); break; case "q": if (marker != String.Empty) { Console.WriteLine("Unsupported marker in <q> at {0}: \"{1}\"", bcv, marker); } if (who == "Jesus") { StartWJ(); } break; } } else if (xr.NodeType == XmlNodeType.EndElement) { switch (xr.Name) { case "w": if (wHasContent) { holyBooks.xw.WriteEndElement(); } break; case "book": EndBook(); currentChapter = 0; break; case "divineName": case "transChange": case "foreign": case "note": case "ip": holyBooks.xw.WriteEndElement(); break; case "verse": // <ve /> SuspendWJ(); WriteEmptyElementWithAttributes("ve"); break; case "q": EndWJ(); break; case "title": EndParagraph(); break; case "p": EndParagraph(); break; } } else if (xr.NodeType == XmlNodeType.Text) { holyBooks.xw.WriteString(xr.Value); } else if (xr.NodeType == XmlNodeType.Whitespace) { holyBooks.xw.WriteWhitespace(xr.Value); } } holyBooks.CloseUsfx(); xr.Close(); } catch (Exception ex) { Logit.WriteError("Error at " + bcv); Logit.WriteError(ex.Message); Logit.WriteError(ex.StackTrace); } }
public bool ConvertUsfxToSile(string usfxFileName, string sileDirectory) { bool result = false; sileDir = sileDirectory; usfx = new XmlTextReader(usfxFileName); usfx.WhitespaceHandling = WhitespaceHandling.All; while (usfx.Read()) { Logit.ShowStatus("converting to SILE " + cv); if (usfx.NodeType == XmlNodeType.Element) { level = usfx.GetAttribute("level"); style = usfx.GetAttribute("style"); sfm = usfx.GetAttribute("sfm"); caller = usfx.GetAttribute("caller"); id = usfx.GetAttribute("id"); switch (usfx.Name) { case "languageCode": SkipElement(); break; case "book": currentBookHeader = currentBookTitle = String.Empty; toc1 = toc2 = toc3 = String.Empty; inToc1 = inToc2 = inToc3 = false; currentChapter = currentChapterPublished = currentChapterAlternate = String.Empty; currentVerse = currentVersePublished = currentVerseAlternate = String.Empty; titleWritten = false; chapterWritten = false; if (id.Length > 2) { currentBookAbbrev = id; bookRecord = (BibleBookRecord)bookInfo.books[currentBookAbbrev]; } if ((bookRecord == null) || (id.Length <= 2)) { Logit.WriteError("Cannot process unknown book: " + currentBookAbbrev); return(false); } if ((bookRecord.testament == "a") && !globe.projectOptions.includeApocrypha) { SkipElement(); } else if (!globe.projectOptions.allowedBookList.Contains(bookRecord.tla)) // Check for presence of book in bookorder.txt { SkipElement(); } else { // We have a book we want to write out. OpenSileFile(); } break; case "fe": // End note. Rarely used, fortunately, but in the standards. Treat as regular footnote. case "f": // footnote if (!usfx.IsEmptyElement) { if (caller == "-") { caller = String.Empty; } else if ((caller == "+") || (String.IsNullOrEmpty(caller))) { caller = footnoteMark.Marker(); } sileFile.WriteStartElement("f"); sileFile.WriteAttributeString("caller", caller); } break; case "x": // Cross references if (!usfx.IsEmptyElement) { if (caller == "-") { caller = String.Empty; } else if ((caller == "+") || (String.IsNullOrEmpty(caller))) { caller = xrefMark.Marker(); } sileFile.WriteStartElement("x"); sileFile.WriteAttributeString("caller", caller); } break; case "ide": case "fm": // Should not actually be in any field texts. Safe to skip. case "idx": // Peripherals - Back Matter Index SkipElement(); break; case "ie": // Introduction end SkipElement(); break; case "id": if (id != currentBookAbbrev) { Logit.WriteError("Book ID in <id> and <book> do not match: " + currentBookAbbrev + " is not " + id); } SkipElement(); // Strip out comment portion. break; case "toc": // Table of Contents entries if (String.IsNullOrEmpty(level) || (level == "1")) { inToc1 = true; } else if (level == "2") { inToc2 = true; } else if (level == "3") { inToc3 = true; } else { SkipElement(); } break; case "rem": // Comment; not part of the actual text SkipElement(); break; case "h": currentBookHeader = ReadElementText().Trim(); break; case "c": currentChapter = id; currentChapterPublished = fileHelper.LocalizeDigits(currentChapter); currentChapterAlternate = String.Empty; currentVerse = currentVersePublished = currentVerseAlternate = String.Empty; currentChapterPublished = chapterLabel + fileHelper.LocalizeDigits(ReadElementText().Trim()); chapterWritten = false; break; case "cl": if (currentChapter == String.Empty) { chapterLabel = ReadElementText().Trim() + " "; } else { currentChapterPublished = ReadElementText().Trim(); } break; case "cp": if (!usfx.IsEmptyElement) { currentChapterPublished = ReadElementText().Trim(); } break; case "v": PrintChapter(); currentVersePublished = fileHelper.LocalizeDigits(id); currentVerse = id.Replace("\u200F", ""); // Strip out RTL character currentVerseAlternate = ""; if (!usfx.IsEmptyElement) { usfx.Read(); if (usfx.NodeType == XmlNodeType.Text) { currentVersePublished = fileHelper.LocalizeDigits(usfx.Value.Trim()); } if (usfx.NodeType != XmlNodeType.EndElement) { usfx.Read(); } } break; case "va": // Not supported by The Sword Project SkipElement(); break; case "vp": SkipElement(); /* This feature is not supported by The Sword Project. * if (!usfx.IsEmptyElement) * { * usfx.Read(); * if (usfx.NodeType == XmlNodeType.Text) * { * currentVersePublished = usfx.Value.Trim(); * if (currentVersePublished.Length > 0) * { * vpeID = StartId(); * StartMosisElement("verse"); * mosis.WriteAttributeString("osisID", osisVerseId); * mosis.WriteAttributeString("sID", verseeID); * mosis.WriteAttributeString("n", currentVersePublished); * WriteMosisEndElement(); // verse * } * } * } */ break; case "periph": SkipElement(); break; case "cs": // Rare or new character style: don't know what it should be, so throw away tag & keep text. break; case "gw": // Do nothing. Not sure what to do with glossary words, yet. case "xt": // Do nothing. case "ft": // Ignore. It does nothing useful, but is an artifact of USFM exclusive character styles. break; case "usfx": // Nothing to do, here. break; case "dc": case "xdc": case "fdc": if (!globe.projectOptions.includeApocrypha) { SkipElement(); } break; default: sileFile.WriteStartElement(usfx.Name); if (id != null) { sileFile.WriteAttributeString("id", id); } if (caller != null) { sileFile.WriteAttributeString("caller", caller); } if (level != null) { sileFile.WriteAttributeString("level", level); } if (sfm != null) { sileFile.WriteAttributeString("sfm", sfm); } if (style != null) { sileFile.WriteAttributeString("style", style); } if (usfx.IsEmptyElement) { sileFile.WriteEndElement(); } break; } } else if (usfx.NodeType == XmlNodeType.EndElement) { if (inToc1 || inToc2) { if (usfx.Name == "toc") { inToc2 = inToc1 = false; } else if (inToc1 && usfx.Name == "it") { toc1 += "</hi></seg>"; } else { Logit.WriteLine("Warning: " + usfx.Name + " end markup in title at " + currentBookAbbrev + " not written to OSIS file"); } } else { switch (usfx.Name) { case "w": case "zw": if (inStrongs) { WriteMosisEndElement(); inStrongs = false; } break; case "wj": WriteMosisEndElement(); // q break; case "book": EndLineGroup(); EndCurrentVerse(); EndCurrentChapter(); EndIntroduction(); EndMajorSection(); WriteMosisEndElement(); // div type="book" CheckElementLevel(3, "closed book"); break; case "bdit": WriteMosisEndElement(); // hi italic WriteMosisEndElement(); // hi bold break; case "p": if (itemLevel > 0) { itemLevel--; } CheckMinimumLevel(5, "Ending " + usfx.Name + " " + osisVerseId); inNote = false; if (eatPoetryLineEnd) { eatPoetryLineEnd = false; } else { WriteMosisEndElement(); } break; case "q": if (eatPoetryLineEnd) { eatPoetryLineEnd = false; } else { WriteMosisEndElement(); } break; case "ref": if (inReference) { WriteMosisEndElement(); // reference inReference = false; } break; case "fe": case "f": case "x": if (inNote) { inNote = false; WriteMosisEndElement(); // End of note } break; case "add": if (!inNote) { WriteMosisEndElement(); } break; case "qs": if (inLineGroup) { WriteMosisEndElement(); inPoetryLine = false; } break; case "bd": case "bk": case "cl": case "d": case "dc": case "em": case "fk": case "fp": case "fq": case "fqa": case "fr": case "fv": case "k": case "no": case "pn": case "qac": case "qt": case "r": case "rq": case "s": case "sc": case "sig": case "sls": case "table": case "tc": case "tcr": case "th": case "thr": case "tl": case "tr": case "xo": case "ord": // case "xq": Not useful for Sword modules. WriteMosisEndElement(); // note, hi, reference, title, l, transChange, etc. break; case "it": if (!inStrongs) { WriteMosisEndElement(); } break; case "nd": WriteMosisEndElement(); // divineName WriteMosisEndElement(); // seg break; case "xk": case "fl": case "zcr": case "zcb": case "zcg": case "zcy": // not supported. break; /* Can't get to this case (caught in "if" above) * case "toc": * inToc2 = inToc1 = false; * break;*/ } } } else if (((usfx.NodeType == XmlNodeType.Text) || (usfx.NodeType == XmlNodeType.SignificantWhitespace) || (usfx.NodeType == XmlNodeType.Whitespace)) && !ignore) { if (inToc1) { toc1 = toc1 + usfx.Value; } else if (inToc2) { toc2 = toc2 + usfx.Value; } else { mosis.WriteString(usfx.Value); } } } return(result); }
/// <summary> /// Reads a USFX file and prepares it for full text search (or concordance generation) /// by extracting only the canonical text within verses (and the canonical Psalm titles, /// which are prepended to verse 1 text), stripping out all formatting, footnotes, etc., /// and normalizing all white space to single spaces. These verse text strings are put /// into an XML file with one "v" element per verse, with book, chapter, and verse given /// in attributes b, c, and v, respectively. /// </summary> /// <param name="usfxFileName">Name of the USFX file to extract canonical text from</param> /// <param name="verseFileName">Name of XML unformatted verse text only file</param> /// <returns></returns> public bool Filter(string usfxFileName, string verseFileName) { string level = String.Empty; string style = String.Empty; string sfm = String.Empty; string caller = String.Empty; string id = String.Empty; string strongs = String.Empty; verseText = new StringBuilder(); lemmaText = new StringBuilder(); bool result = false; try { utf8encoding = new UTF8Encoding(false); vplFile = new StreamWriter(Path.ChangeExtension(verseFileName, ".vpltxt"), false, utf8encoding); lemmaFile = new XmlTextWriter(Path.ChangeExtension(verseFileName, ".lemma"), utf8encoding); lemmaFile.Formatting = Formatting.Indented; lemmaFile.WriteStartDocument(); lemmaFile.WriteStartElement("lemmaFile"); usfx = new XmlTextReader(usfxFileName); usfx.WhitespaceHandling = WhitespaceHandling.All; verseFile = new XmlTextWriter(verseFileName, utf8encoding); verseFile.Formatting = Formatting.Indented; verseFile.WriteStartDocument(); verseFile.WriteStartElement("verseFile"); while (usfx.Read()) { if (!Logit.ShowStatus("extracting search text " + currentPlace)) { return(false); } if (usfx.NodeType == XmlNodeType.Element) { level = fileHelper.GetNamedAttribute(usfx, "level"); style = fileHelper.GetNamedAttribute(usfx, "style"); sfm = fileHelper.GetNamedAttribute(usfx, "sfm"); caller = fileHelper.GetNamedAttribute(usfx, "caller"); id = fileHelper.GetNamedAttribute(usfx, "id"); switch (usfx.Name) { case "book": currentChapter = String.Empty; currentVerse = String.Empty; if (id.Length == 3) { currentBook = id; bookRecord = (BibleBookRecord)bookInfo.books[currentBook]; osisBook = bookRecord.osisName; BibleWorksBook = bookRecord.bibleworksCode; } if ((bookRecord == null) || (id.Length != 3)) { Logit.WriteError("Cannot process unknown book: " + currentBook); SkipElement(); } if (bookRecord.testament == "x") { // Skip peripherals. SkipElement(); } currentPlace = currentBook; break; case "id": if (id != currentBook) { Logit.WriteError("Book ID in <id> and <book> do not match; " + currentBook + " is not " + id); } SkipElement(); // Strip out comment portion. break; case "h": usfx.Read(); if (usfx.NodeType == XmlNodeType.Text) { bookRecord.vernacularShortName = usfx.Value.Trim(); } break; case "toc": usfx.Read(); if (usfx.NodeType == XmlNodeType.Text) { if (level == "1") { bookRecord.vernacularLongName = usfx.Value.Trim(); } else if (level == "2") { string sn = usfx.Value.Trim(); if ((bookRecord.vernacularShortName.Length < 2) || (sn.Length < bookRecord.vernacularShortName.Length)) { bookRecord.vernacularShortName = sn; } } } break; case "c": EndVerse(); // In case file lacks <ve /> elements. currentChapter = id; currentVerse = String.Empty; currentPlace = currentBook + "_" + currentChapter; SkipElement(); // Doesn't skip chapter, just the published chapter number, if present. break; case "v": EndVerse(); // In case file lacks <ve /> elements. inVerse = true; currentVerse = id; currentPlace = currentBook + "_" + currentChapter + "_" + currentVerse; SkipElement(); // Just in case there is a published verse number present. break; case "ve": EndVerse(); break; case "b": // blank line case "optionalLineBreak": case "qs": case "th": case "thr": case "tc": case "tcr": if (inVerse) { verseText.Append(' '); } break; case "d": // Make canonical psalm titles searchable inPsalmTitle = true; break; case "add": verseText.Append("["); break; case "nd": //verseText.Append("{"); break; case "languageCode": case "f": // footnote case "fe": // End note. Rarely used, fortunately, but in the standards. case "x": // Cross references case "glo": case "ide": case "fig": // figure case "fdc": case "fm": // Should not actually be in any field texts. Safe to skip. case "idx": // Peripherals - Back Matter Index case "ie": // Introduction end case "iex": // Introduction explanatory or bridge text case "fp": case "rem": // Comment; not part of the actual text case "cl": case "ca": case "vp": case "periph": case "milestone": case "rq": case "s": SkipElement(); break; case "w": strongs = fileHelper.GetNamedAttribute(usfx, "s"); if (!String.IsNullOrEmpty(strongs)) { lemmaText.Append(strongs + " "); } break; case "p": if (sfm.StartsWith("i")) { SkipElement(); } else { switch (sfm) { case "cd": case "intro": case "hr": // Horizontal rule not supported. Try a line break. case "ib": case "im": case "imq": case "imi": case "ip": case "ipi": case "ipq": case "ipr": case "mt": case "keyword": case "iq": case "imte": case "imt": case "is": case "iot": case "ior": case "io": case "ili": case "r": SkipElement(); break; } } break; } } else if (usfx.NodeType == XmlNodeType.EndElement) { switch (usfx.Name) { case "book": EndVerse(); // In case file lacks <ve /> elements. currentBook = currentChapter = currentVerse = String.Empty; break; case "d": inPsalmTitle = false; break; case "add": verseText.Append("]"); break; case "nd": // verseText.Append("}"); break; } } else if (usfx.NodeType == XmlNodeType.Text) { if (inVerse || inPsalmTitle) { verseText.Append(usfx.Value); } } else if ((usfx.NodeType == XmlNodeType.SignificantWhitespace) || (usfx.NodeType == XmlNodeType.Whitespace)) { if (inVerse || inPsalmTitle) { verseText.Append(" "); } } } Logit.ShowStatus("writing " + verseFileName); verseFile.WriteEndElement(); // verseFile lemmaFile.WriteEndElement(); // lemmaFile verseFile.Close(); lemmaFile.Close(); vplFile.Close(); usfx.Close(); result = true; } catch (Exception ex) { Logit.WriteError(ex.Message); } return(result); }
/// <summary> /// Read a USX file, convert to USFX, and append to a USFX file. /// </summary> /// <param name="UsxFileName">Name of one USX file to read</param> /// <returns>true iff the conversion worked</returns> protected bool ReadUsx(string UsxFileName) { int charNesting = 0; int noteCharNesting = 0; string style; string number; string code; string sfm; string level; string caller; string loc; string closed; string thisBook = String.Empty; string thisChapter = String.Empty; string thisVerse = String.Empty; bool badNoteCharSyntaxUsed = false; bool inNote = false; try { usx = new XmlTextReader(UsxFileName); usx.WhitespaceHandling = WhitespaceHandling.Significant; while (usx.Read()) { if (usx.NodeType == XmlNodeType.Element) { style = GetAnAttribute("style"); number = GetAnAttribute("number"); code = GetAnAttribute("code"); caller = GetAnAttribute("caller"); closed = GetAnAttribute("closed"); loc = GetAnAttribute("loc"); switch (usx.Name) { // TODO: Handle: rem, cl, cp, ca, va, vp case "usx": // Ignore this one and use </usx> to close the <book> tag. break; case "book": // In usfx, <book> is a container around a book. // In usx, <book> is encompasses only the \id line if (processedUsxBooks.Contains(code)) { usx.Close(); return(false); // Skipping book because we read it already in another canon set } processedUsxBooks += code + " "; // Keep track of books already processed. scrp.xw.WriteStartElement("book"); scrp.xw.WriteAttributeString("id", code); scrp.xw.WriteStartElement("id"); scrp.xw.WriteAttributeString("id", code); thisBook = code; thisChapter = thisVerse = "0"; CloseEmptyElement(); break; case "chapter": scrp.xw.WriteStartElement(style); scrp.xw.WriteAttributeString("id", number); thisChapter = number; thisVerse = "0"; CloseEmptyElement(); break; case "verse": number = number.Replace(',', '-'); // Paratext allows comma or dash as a separator in verse ranges. scrp.xw.WriteStartElement(style); scrp.xw.WriteAttributeString("id", number); thisVerse = number; CloseEmptyElement(); /* * if ((thisBook == "ACT") && (thisChapter == "11") && (thisVerse == "11")) * Logit.WriteLine("Acts 11:11"); */ break; case "note": scrp.xw.WriteStartElement(style); scrp.xw.WriteAttributeString("caller", caller); scrp.xw.WriteAttributeString("sfm", style); badNoteCharSyntaxUsed = false; inNote = true; CloseEmptyElement(); break; case "char": scrp.xw.WriteStartElement(style); if (!usx.IsEmptyElement) { if (inNote) { noteCharNesting++; } else { charNesting++; } } if ((closed == "false") && (usx.IsEmptyElement)) { badNoteCharSyntaxUsed = true; Logit.WriteError("Empty unclosed char element at " + thisBook + " " + thisChapter + ":" + thisVerse); } else { CloseEmptyElement(); } break; case "table": scrp.xw.WriteStartElement("table"); CloseEmptyElement(); break; case "row": scrp.xw.WriteStartElement(style); CloseEmptyElement(); break; case "cell": scrp.xw.WriteStartElement(style); CloseEmptyElement(); break; case "para": level = String.Empty; sfm = style; int lastDigitIndex = style.Length - 1; if (char.IsDigit(style[lastDigitIndex])) { level = style.Substring(lastDigitIndex); sfm = style.Substring(0, lastDigitIndex); } switch (sfm) { case "h": scrp.xw.WriteStartElement("h"); break; case "toc": if (level == String.Empty) { level = "1"; } scrp.xw.WriteStartElement("toc"); scrp.xw.WriteAttributeString("level", level); break; case "p": case "q": case "d": case "s": case "mt": scrp.xw.WriteStartElement(sfm); if (!String.IsNullOrEmpty(level)) { scrp.xw.WriteAttributeString("level", level); } break; case "restore": // Discard this paragraph: it is a useless comment, not USFM, meaningless for publishing, and deprecated in current Paratext use if (!usx.IsEmptyElement) { bool stillMore = true; while (stillMore && !(usx.NodeType == XmlNodeType.EndElement)) { stillMore = usx.Read(); } } break; default: scrp.xw.WriteStartElement("p"); scrp.xw.WriteAttributeString("sfm", sfm); if (!String.IsNullOrEmpty(level)) { scrp.xw.WriteAttributeString("level", level); } break; } CloseEmptyElement(); break; case "figure": scrp.xw.WriteStartElement(style); string s = GetAnAttribute("desc"); scrp.xw.WriteElementString("description", s); s = GetAnAttribute("file"); scrp.xw.WriteElementString("catalog", s); s = GetAnAttribute("size"); scrp.xw.WriteElementString("size", s); s = GetAnAttribute("loc"); scrp.xw.WriteElementString("location", s); s = GetAnAttribute("copy"); scrp.xw.WriteElementString("copyright", s); s = GetAnAttribute("ref"); scrp.xw.WriteElementString("reference", s); if (!usx.IsEmptyElement) { usx.Read(); if (usx.NodeType == XmlNodeType.Text) { scrp.xw.WriteElementString("caption", usx.Value); } else if (usx.NodeType == XmlNodeType.EndElement) { scrp.xw.WriteEndElement(); if (usx.Name != "figure") { Logit.WriteError("Unexpected tag after figure: " + usx.Name); } } else { Logit.WriteError("Unexpected node type reading caption of figure!"); } } CloseEmptyElement(); break; case "optbreak": scrp.xw.WriteStartElement("optionalLineBreak"); CloseEmptyElement(); break; case "ref": reftgt = usxLoc2usfxTgt(loc); if (reftgt.Length > 6) { scrp.xw.WriteStartElement("ref"); scrp.xw.WriteAttributeString("tgt", reftgt); } break; default: Logit.WriteError("Unrecognized USX element name: " + usx.Name); break; } } else if (usx.NodeType == XmlNodeType.EndElement) { if (usx.Name == "ref") { if (reftgt.Length > 6) { scrp.xw.WriteEndElement(); } } else { if (usx.Name == "char") { if (inNote) { noteCharNesting--; } else { charNesting--; } } if ((noteCharNesting < 0) || (charNesting < 0)) { Logit.WriteError(String.Format("Unexpected char nesting value: {0} normal {1} in notes", charNesting, noteCharNesting)); } if ((usx.Name == "note") && badNoteCharSyntaxUsed) { inNote = false; if (badNoteCharSyntaxUsed) { scrp.xw.WriteEndElement(); // Close the character style started with a milestone. Yukky syntax. badNoteCharSyntaxUsed = false; } } scrp.xw.WriteEndElement(); } } else if ((usx.NodeType == XmlNodeType.SignificantWhitespace) || (usx.NodeType == XmlNodeType.Whitespace) || (usx.NodeType == XmlNodeType.Text)) { scrp.xw.WriteString(usx.Value); } } usx.Close(); } catch (Exception ex) { Logit.WriteError("Error reading " + UsxFileName); Logit.WriteError(ex.Message); return(false); } return(true); }
/// <summary> /// Write an SQL file for MySQL from the VPL XML search text file. /// </summary> /// <param name="verseFileName">Name of the XML verse per line file.</param> /// <param name="translationId">Bible translation ID</param> /// <param name="sqlName">Name of the SQL file to write.</param> public void WriteSearchSql(string verseFileName, string translationId, string sqlName) { Hashtable verseDupCheck = new Hashtable(64007); string tableName = Path.GetFileNameWithoutExtension(sqlName).Replace('-', '_'); XmlTextReader searchTextXml = new XmlTextReader(verseFileName); string book, bk, ch, vs, startVerse, endVerse, verseID, verseText, canon_order; int i; int dup = 0; StreamWriter sqlFile = new StreamWriter(sqlName, false, System.Text.Encoding.UTF8); sqlFile.WriteLine("USE sofia;"); sqlFile.WriteLine("DROP TABLE IF EXISTS sofia.{0};", tableName); sqlFile.WriteLine(@"CREATE TABLE {0} ( verseID VARCHAR(16) NOT NULL PRIMARY KEY, canon_order VARCHAR(12) NOT NULL, book VARCHAR(3) NOT NULL, chapter VARCHAR(3) NOT NULL, startVerse VARCHAR(3) NOT NULL, endVerse VARCHAR(3) NOT NULL, verseText TEXT CHARACTER SET UTF8 NOT NULL) ENGINE=MyISAM;", tableName); sqlFile.WriteLine("LOCK TABLES {0} WRITE;", tableName); while (searchTextXml.Read()) { if ((searchTextXml.NodeType == XmlNodeType.Element) && (searchTextXml.Name == "v")) { book = fileHelper.GetNamedAttribute(searchTextXml, "b"); bk = bookInfo.getShortCode(book); ch = fileHelper.GetNamedAttribute(searchTextXml, "c"); vs = endVerse = startVerse = fileHelper.GetNamedAttribute(searchTextXml, "v"); // Verse numbers might be verse bridges, like "20-22" or simple numbers, like "20". i = vs.IndexOf('-'); if (i > 0) { startVerse = startVerse.Substring(0, i); if (vs.Length > i) { endVerse = vs.Substring(i + 1); } } verseID = bk + ch + "_" + startVerse; canon_order = ((BibleBookRecord)bookInfo.books[book]).sortOrder.ToString("000") + "_" + ch + "_" + startVerse; if (verseDupCheck[verseID] != null) { Logit.WriteError("Duplicate verse ID: " + verseID); dup++; verseID = verseID + "_" + dup.ToString(); } verseDupCheck[verseID] = vs; searchTextXml.Read(); if (searchTextXml.NodeType == XmlNodeType.Text) { verseText = searchTextXml.Value; sqlFile.WriteLine("INSERT INTO {0} VALUES (\"{1}\",\"{2}\",\"{3}\",\"{4}\",\"{5}\",\"{6}\",\"{7}\");", tableName, verseID, canon_order, book, ch, startVerse, endVerse, verseText.Replace("\"", "\\\"")); } } } searchTextXml.Close(); sqlFile.WriteLine("ALTER TABLE {0} ADD FULLTEXT(verseText);", tableName); sqlFile.WriteLine("UNLOCK TABLES;"); sqlFile.Close(); }
static void Main(string[] args) { bool showBanner = true; bool showHelp = false; string logName = "WordSendLog.txt"; string inName = ""; string outName = ""; int i; string jobOptionsName = Environment.GetEnvironmentVariable("APPDATA") + "\\SIL\\WordSend\\joboptions.xml"; SFConverter.jobIni = new XMLini(jobOptionsName); Logit.useConsole = true; for (i = 0; i < args.Length; i++) { // Scan the command line string s = args[i]; if ((s != null) && (s.Length > 0)) { if (((s[0] == '-') || (s[0] == '/')) && (s.Length > 1)) { // command line switch: take action switch (Char.ToLower(s[1])) { case 'n': // No banner display showBanner = false; break; case 'o': // Set output file name outName = SFConverter.GetOption(ref i, args); break; case 'i': // Set input file name inName = SFConverter.GetOption(ref i, args); break; case 'l': // Set log name logName = SFConverter.GetOption(ref i, args); break; case '?': case 'h': case '-': showBanner = true; showHelp = true; break; default: Logit.WriteLine("Unrecognized command line switch: " + args[i]); break; } } else { if (inName == "") { inName = s; } else { if (outName == "") { outName = s; } else { showBanner = true; showHelp = true; } } } } } SFConverter.scripture = new Scriptures(); Logit.OpenFile(logName); if (showBanner) { Logit.WriteLine("\nWordSend project extract_usfx compiled " + Version.date); Logit.WriteLine(""); Logit.WriteLine(Version.copyright); Logit.WriteLine(""); Logit.WriteLine(Version.contact); } if ((outName.Length < 1) || (inName.Length < 1)) { showHelp = true; } if (showHelp) { Logit.WriteLine(@" Syntax: sf2word [-l logname] [-h] [-n] [-i] inputfile [-o] outputfile logname = log file name (default is WordSendLog.txt) -h = show this information then exit -n = supress banner inputfile = Microsoft Word XML file that has embedded USFX outputfile = name of USFX file to write "); Logit.CloseFile(); return; } Logit.WriteLine("Auxilliary files read."); SFConverter.scripture.ExtractUSFX(inName, outName); Logit.WriteLine("Done."); Logit.CloseFile(); }
/// <summary> /// Write navigational links to get to another chapter from here. /// /// </summary> protected override void WriteNavButtons() { int i; int prevChapIndex = 0; string s = String.Empty; prevChapterLink = String.Empty; nextChapterLink = String.Empty; int chapNumSize; try { string formatString = FormatString(out chapNumSize); string firstChapterFile = FirstChapterFile(formatString); string thisBookName = currentBookHeader; if (currentBookHeader.Trim().Length == 0) { if (currentBookAbbrev == "CPR") { thisBookName = "^"; } else { thisBookName = String.Empty; } } CheckHomeLink(); if (bookListIndex >= 0) { if (currentChapter.Trim().Length == 0) { if (chapterNumber < 0) { currentChapter = "0"; } else { currentChapter = chapterNumber.ToString(); } } string chFile; int nextChapIndex = -1; for (i = 0; (i < chapterFileList.Count) && (nextChapIndex < 0); i++) { chFile = (string)chapterFileList[i]; int cn; if ((!String.IsNullOrEmpty(chFile)) && chFile.StartsWith(currentBookAbbrev) && (int.TryParse(chFile.Substring(chFile.Length - chapNumSize), out cn))) { if (cn == chapterNumber) { nextChapIndex = i + 1; prevChapIndex = i - 1; } } } if ((nextChapIndex >= chapterFileList.Count) || (nextChapIndex < 0)) { nextChapIndex = 0; } nextChapterLink = String.Format("{0}.htm", (string)chapterFileList[nextChapIndex]); if ((prevChapIndex >= 0) && (prevChapIndex < chapterFileList.Count)) { prevChapterLink = String.Format("{0}.htm", (string)chapterFileList[prevChapIndex]); } else { prevChapterLink = "index.htm"; } } else { nextChapterLink = String.Format("{0}.htm", (string)chapterFileList[0]); } if (currentBookAbbrev != "CPR") { navButtons = String.Format(@"<ul class='tnav'> <li><a href='index.htm'>{0}</a></li> <li><a href='{1}'><</a></li> <li><a href='{2}.htm'>{3}</a></li> <li><a href='{4}'>></a></li> </ul>", thisBookName, prevChapterLink, currentBookAbbrev, fileHelper.LocalizeDigits(currentChapter), nextChapterLink); } else { navButtons = String.Format(@"<ul class='tnav'> <li><a href='index.htm'>{0}</a></li> <li><a href='{1}'><</a></li> <li><a href='{2}'>></a></li> </ul>", thisBookName, prevChapterLink, nextChapterLink); } htm.WriteLine(navButtons); } catch (Exception ex) { Logit.WriteError("ERROR in usfx2MobileHtml::WriteNavButtons():"); Logit.WriteError(ex.Message); } }
private bool ReadIniFile(string fName) { XmlTextReader xml = null; string k = null; string v = null; string elementName = null; bool result = false; if (!File.Exists(fName)) { return(false); } try { xml = new XmlTextReader(fName); xml.WhitespaceHandling = WhitespaceHandling.None; xml.MoveToContent(); while (xml.Read()) { switch (xml.NodeType) { case XmlNodeType.Element: elementName = xml.Name; if (elementName == "entry") { k = v = ""; } break; case XmlNodeType.Text: if (elementName == "key") { k += xml.Value; } else if (elementName == "value") { v += xml.Value; } break; case XmlNodeType.EntityReference: if (elementName == "key") { k += xml.Value; } else if (elementName == "value") { v += xml.Value; } break; case XmlNodeType.EndElement: if (xml.Name == "entry") { hashTbl[k] = v; } break; } } result = true; } catch { Logit.WriteError("Bad input format in file " + fName + "; using defaults."); } finally { if (xml != null) { xml.Close(); } } return(result); }
static void Main(string[] args) { int i; bool showBanner = false; string logName = "usfx2mosislog.txt"; string inName = "usfx.xml"; string outName = "mosis.xml"; string ethnologueCode = String.Empty; string translationId = String.Empty; Logit.useConsole = true; for (i = 0; i < args.Length; i++) { // Scan the command line string s = args[i]; if ((s != null) && (s.Length > 0)) { if ((s[0] == '-') && (s.Length > 1)) { // command line switch: take action switch (Char.ToLower(s[1])) { case 'o': // Set output file name outName = SFConverter.GetOption(ref i, args); break; case 'i': // Set input file name inName = SFConverter.GetOption(ref i, args); break; case 'l': // Set log name logName = SFConverter.GetOption(ref i, args); break; case 't': translationId = SFConverter.GetOption(ref i, args); break; case '-': case 'h': case '?': case '/': showBanner = true; break; default: Logit.WriteLine("Unrecognized command line switch: " + args[i]); showBanner = true; break; } } else if (inName == "") { inName = args[i]; } } } Logit.OpenFile(logName); if (showBanner) { Logit.WriteLine(""); Logit.WriteLine("This is part of Haiola open source software."); Logit.WriteLine("Please see http://haiola.org for copyright information."); Logit.WriteLine(@" Syntax: usfx2usfm [-t translationId] [-o Output] [-?] [-l logname] [-i] [inputfile] translationId = unique translation identifier default is Ethnologue language code specified in usfx file Output = output USFM path and file name suffix default is mosis.xml logname is name of log file to write, default is usfx2mosislog.txt -h or -? = show this information. inputfile = name of USFX file to convert to USFM default is usfx.xml File names with embedded spaces must be surrounded by quotes. Do not use - as the first character of a path or file name. "); } else { Logit.WriteLine(DateTime.Now.ToString()); Logit.WriteLine("Input USFX: " + inName + "; output MOSIS: " + outName); usfxToMosisConverter toMosis = new usfxToMosisConverter(); toMosis.translationId = translationId; toMosis.revisionDateTime = DateTime.Now; toMosis.languageCode = String.Empty; toMosis.vernacularTitle = toMosis.contentCreator = toMosis.contentContributor = String.Empty; toMosis.englishDescription = toMosis.lwcDescription = toMosis.printPublisher = String.Empty; toMosis.ePublisher = toMosis.languageName = toMosis.dialect = String.Empty; toMosis.vernacularLanguageName = toMosis.copyrightNotice = String.Empty; toMosis.rightsNotice = String.Empty; toMosis.langCodes = new LanguageCodeInfo(); toMosis.ConvertUsfxToMosis(inName, outName); } Logit.CloseFile(); }
public void WriteTheModule() { string about; // licenseHtml or contents of about.inc in input project directory if (String.IsNullOrEmpty(globe.projectOptions.fcbhId)) { return; } int i; if ((globe.projectOptions.languageId.Length < 3) || (globe.projectOptions.translationId.Length < 3)) { return; } string UsfxPath = Path.Combine(globe.outputProjectDirectory, "usfx"); string browserBiblePath = Path.Combine(globe.outputProjectDirectory, "browserBible"); string browserBibleCssFileName; // Using latin.css for ALL projects marked as having a Latin script, regardless of exclusive use of common characters or not per Ken Bitgood 20 April 2016. // Reversed 15 Dec 2018 due to problems with NASB display. if (globe.projectOptions.commonChars) { browserBibleCssFileName = "latin.css"; } else { browserBibleCssFileName = globe.projectOptions.customCssFileName; } Utils.EnsureDirectory(browserBiblePath); if (String.IsNullOrEmpty(globe.projectOptions.fcbhId)) { MessageBox.Show("Missing FCBHID for " + globe.projectOptions.translationId); globe.projectOptions.fcbhId = globe.projectOptions.translationId; } if (!Directory.Exists(UsfxPath)) { MessageBox.Show(UsfxPath + " not found!", "ERROR"); return; } if (Directory.Exists(browserBiblePath)) { // Delete any old files in this directory. Utils.DeleteDirectory(browserBiblePath); } Utils.EnsureDirectory(browserBiblePath); string browserBibleCss = Path.Combine(browserBiblePath, browserBibleCssFileName); Utils.DeleteFile(browserBibleCss); // Always get the browser Bible CSS file from BibleConv/browserBiblecss/ with the same file name as the current custom CSS file name used for simple HTML. string browserBiblecssDir = Path.Combine(globe.dataRootDir, "browserBiblecss"); string specialCss = Path.Combine(browserBiblecssDir, browserBibleCssFileName); if (File.Exists(specialCss)) { File.Copy(specialCss, browserBibleCss); } string fallbackCss = Path.Combine(browserBiblecssDir, "fallback.css"); if (File.Exists(fallbackCss)) { File.Copy(fallbackCss, Path.Combine(browserBiblePath, "fallback.css")); } string aboutFile = Path.Combine(globe.inputProjectDirectory, "about.inc"); if (File.Exists(aboutFile)) { StreamReader sr = new StreamReader(aboutFile); about = globe.expandPercentEscapes(sr.ReadToEnd()); sr.Close(); } else { about = globe.copyrightPermissionsStatement(); } string sqlDir = Path.Combine(globe.outputProjectDirectory, "sql"); Utils.EnsureDirectory(sqlDir); usfx2BrowserBible toBrowserBible; toBrowserBible = new usfx2BrowserBible(); toBrowserBible.projectOptions = globe.projectOptions; toBrowserBible.projectOutputDir = globe.outputProjectDirectory; toBrowserBible.stripPictures = true; DateTime srcDate = globe.sourceDate; // Bring local to avoid potential exception in marshall-by-reference class toBrowserBible.indexDateStamp = "This module was generated by <a href='http://eBible.org'>eBible.org</a> on " + DateTime.UtcNow.ToString("d MMM yyyy") + " from source files dated " + srcDate.ToString("d MMM yyyy" + "."); if (!String.IsNullOrEmpty(certified)) { StreamReader sr = new StreamReader("/home/kahunapule/sync/doc/Electronic Scripture Publishing/ebible_certified_sm.b64"); string cert = sr.ReadToEnd(); sr.Close(); File.Copy(certified, Path.Combine(browserBiblePath, "eBible.org_certified.jpg")); toBrowserBible.indexDateStamp = String.Format("{0}<br /><a href='http://eBible.org/certified/' target='_blank'><img src='data:image/png;base64,{1}'>", toBrowserBible.indexDateStamp, cert); } toBrowserBible.CrossRefToFilePrefixMap = globe.projectOptions.CrossRefToFilePrefixMap; string usfxFilePath = Path.Combine(UsfxPath, "usfx.xml"); string orderFile = Path.Combine(globe.inputProjectDirectory, "bookorder.txt"); if (!File.Exists(orderFile)) { orderFile = SFConverter.FindAuxFile("bookorder.txt"); } StringBuilder localNumbers = new StringBuilder("\"numbers\":["); for (i = 0; i < 150; i++) { localNumbers.Append("\"" + fileHelper.LocalizeDigits(i.ToString()) + "\","); } localNumbers.Append("\"" + fileHelper.LocalizeDigits("150") + "\"],"); toBrowserBible.country = globe.projectOptions.country; toBrowserBible.countryCode = globe.projectOptions.countryCode; toBrowserBible.bookInfo.ReadPublicationOrder(orderFile); toBrowserBible.MergeXref(Path.Combine(globe.inputProjectDirectory, "xref.xml")); toBrowserBible.sourceLink = globe.expandPercentEscapes("<a href=\"http://%h/%t\">%v</a>"); toBrowserBible.textDirection = globe.projectOptions.textDir; toBrowserBible.languageNameInEnglish = globe.projectOptions.languageNameInEnglish; toBrowserBible.languageNameInVernacular = globe.projectOptions.languageName; toBrowserBible.traditionalAbbreviation = globe.projectOptions.translationTraditionalAbbreviation; toBrowserBible.englishDescription = globe.projectOptions.EnglishDescription; toBrowserBible.customCssName = browserBibleCssFileName; toBrowserBible.numbers = localNumbers.ToString(); toBrowserBible.fcbhAudioNt = globe.projectOptions.fcbhAudioNT; toBrowserBible.fcbhAudioOt = globe.projectOptions.fcbhAudioOT; toBrowserBible.fcbhDramaNt = globe.projectOptions.fcbhDramaNT; toBrowserBible.fcbhDramaOt = globe.projectOptions.fcbhDramaOT; toBrowserBible.fcbhPortion = globe.projectOptions.fcbhAudioPortion; toBrowserBible.coverName = Path.GetFileName(globe.preferredCover); string coverPath = Path.Combine(browserBiblePath, toBrowserBible.coverName); File.Copy(globe.preferredCover, coverPath, true); string covertnpng = Path.Combine(browserBiblePath, "covertn.png"); string covertnb64 = Path.Combine(browserBiblePath, "covertn.b64"); fileHelper.RunCommand(String.Format("shrinkcover {0} {1} {2}", coverPath, covertnpng, covertnb64)); toBrowserBible.b64CoverName = covertnb64; if ((globe.er != null) && (globe.er.countries != null)) { toBrowserBible.countries = globe.er.countries; } if (globe.projectOptions.commonChars) { toBrowserBible.preferredFont = "latin"; } else { toBrowserBible.preferredFont = globe.projectOptions.fontFamily; } toBrowserBible.shortTitle = globe.projectOptions.shortTitle; toBrowserBible.fcbhId = globe.projectOptions.fcbhId; toBrowserBible.dialectCode = globe.projectOptions.languageId + globe.projectOptions.dialect + globe.projectOptions.script; toBrowserBible.script = globe.projectOptions.script; string sqlTranslationId = globe.projectOptions.translationId.Replace("-", "_"); toBrowserBible.sqlTableName = sqlTranslationId + "_isvl"; toBrowserBible.sqlFileName = Path.Combine(Path.Combine(globe.outputProjectDirectory, "sql"), globe.projectOptions.translationId + "_isvl.sql"); toBrowserBible.langCodes = globe.languageCodes; toBrowserBible.xrefCall.SetMarkers(globe.projectOptions.xrefCallers); toBrowserBible.footNoteCall.SetMarkers(globe.projectOptions.footNoteCallers); toBrowserBible.redistributable = globe.projectOptions.redistributable; toBrowserBible.projectInputDir = globe.inputProjectDirectory; toBrowserBible.ConvertUsfxToHtml(usfxFilePath, browserBiblePath, globe.projectOptions.vernacularTitle, globe.projectOptions.languageId, globe.projectOptions.translationId, globe.projectOptions.chapterLabel, globe.projectOptions.psalmLabel, "<a href='copyright.htm'>" + usfxToHtmlConverter.EscapeHtml(globe.shortCopyrightMessage) + "</a>", globe.expandPercentEscapes(globe.projectOptions.homeLink), globe.expandPercentEscapes(globe.projectOptions.footerHtml), globe.expandPercentEscapes(globe.projectOptions.indexHtml), about, globe.projectOptions.ignoreExtras, globe.projectOptions.goText); ci = new CreateIndex(); ci.MakeJsonIndex(Path.Combine(Path.Combine(globe.outputProjectDirectory, "search"), "verseText.xml"), Path.Combine(browserBiblePath, "index"), Path.Combine(sqlDir, sqlTranslationId + "_conc.sql")); ci.MakeLemmaIndex(Path.Combine(Path.Combine(globe.outputProjectDirectory, "search"), "verseText.lemma"), Path.Combine(browserBiblePath, "indexlemma")); string fontsDir = Path.Combine(browserBiblePath, "fonts"); fileHelper.EnsureDirectory(fontsDir); string fontSource = Path.Combine(globe.dataRootDir, "fonts"); string fontName = globe.projectOptions.fontFamily.ToLower().Replace(' ', '_'); fileHelper.CopyFile(Path.Combine(fontSource, fontName + ".ttf"), Path.Combine(fontsDir, fontName + ".ttf")); fileHelper.CopyFile(Path.Combine(fontSource, fontName + ".woff"), Path.Combine(fontsDir, fontName + ".woff")); fileHelper.CopyFile(Path.Combine(fontSource, fontName + ".eot"), Path.Combine(fontsDir, fontName + ".eot")); Utils.DeleteFile(covertnpng); Utils.DeleteFile(covertnb64); Logit.CloseFile(); if (Logit.loggedError) { globe.projectOptions.lastRunResult = false; } if (Logit.loggedWarning) { globe.projectOptions.warningsFound = true; } }
public void PreprocessUsfmFiles(string SourceDir) { // First, copy BookNames.xml for ready reference. We will update it later. string bookNamesCopy = Path.Combine(outputProjectDirectory, "BookNames.xml"); string bookNamesSource = Path.Combine(SourceDir, "BookNames.xml"); if (File.Exists(bookNamesCopy)) { File.Delete(bookNamesCopy); } if (File.Exists(bookNamesSource)) { File.Copy(bookNamesSource, bookNamesCopy, true); } // Now, get on with preprocessing the USFM files. Logit.GUIWriteString = GUIWriteString; Logit.OpenFile(Path.Combine(outputProjectDirectory, "preprocesslog.txt")); // string SourceDir = Path.Combine(globe.inputProjectDirectory, "Source"); /* * StreamReader sr = new StreamReader(orderFile); * string allowedBookList = sr.ReadToEnd(); * sr.Close(); */ string bookId; string UsfmDir = Path.Combine(outputProjectDirectory, "extendedusfm"); if (!Directory.Exists(SourceDir)) { WriteLine("ERROR: " + SourceDir + " not found!"); return; } // Start with an EMPTY USFM directory to avoid problems with old files Utils.DeleteDirectory(UsfmDir); fileHelper.EnsureDirectory(UsfmDir); string[] inputFileNames = Directory.GetFiles(SourceDir); if (inputFileNames.Length == 0) { WriteLine("ERROR: No files found in " + SourceDir); return; } foreach (string inputFile in inputFileNames) { string filename = Path.GetFileName(inputFile); string lowerName = filename.ToLower(); string fileType = Path.GetExtension(filename).ToUpper(); if ((fileType != ".BAK") && (fileType != ".LDS") && (fileType != ".SSF") && (fileType != ".DBG") && (fileType != ".WDL") && (fileType != ".STY") && (fileType != ".XML") && (fileType != ".HTM") && (fileType != ".KB2") && (fileType != ".HTML") && (fileType != ".CSS") && (fileType != ".SWP") && (fileType != ".ID") && (fileType != ".DIC") && (fileType != ".LDML") && (fileType != ".JSON") && (fileType != ".VRS") && (fileType != ".INI") && (fileType != ".CSV") && (fileType != ".TSV") && (fileType != ".CCT") && (!inputFile.EndsWith("~")) && (lowerName != "autocorrect.txt") && (lowerName != "tmp.txt") && (lowerName != "changes.txt") && (lowerName != "hyphenatedWords.txt") && (lowerName != "wordboundariesoutput.txt") && (lowerName != "printdraftchanges.txt")) { ShowStatus("preprocessing " + filename); Application.DoEvents(); if (!fileHelper.fAllRunning) { break; } string outputFileName = MakeUpUsfmFileName(inputFile, out bookId) + ".usfm"; if (outputFileName.Length < 8) { if (fileType != ".TXT") { Logit.WriteLine("No proper \\id line found in " + inputFile); } } else { if (projectOptions.allowedBookList.Contains(bookId)) { string outputFilePath = Path.Combine(UsfmDir, outputFileName); PreprocessOneFile(inputFile, projectOptions.preprocessingTables, outputFilePath); } /* * else * { * Logit.WriteLine("Skipping book " + bookId + " (not in " + orderFile + ")"); * } */ } } } }
static void Main(string[] args) { int i; bool showBanner = false; string logName = "WordSendLog.txt"; string inName = ""; string outName = ".sfm"; // string jobOptionsName = Environment.GetEnvironmentVariable("APPDATA")+ // "\\SIL\\WordSend\\joboptions.xml"; Logit.useConsole = true; for (i = 0; i < args.Length; i++) { // Scan the command line string s = args[i]; if ((s != null) && (s.Length > 0)) { if ((s[0] == '-') && (s.Length > 1)) { // command line switch: take action switch (Char.ToLower(s[1])) { case 'o': // Set output file name outName = SFConverter.GetOption(ref i, args); break; case 'i': // Set input file name inName = SFConverter.GetOption(ref i, args); break; case 'l': // Set log name logName = SFConverter.GetOption(ref i, args); break; case '-': case 'h': case '?': showBanner = true; break; default: Logit.WriteLine("Unrecognized command line switch: " + args[i]); showBanner = true; break; } } else if (inName == "") { inName = args[i]; } } } Logit.OpenFile(logName); Logit.WriteLine("\nWordSend project usfx2usfm compiled " + Version.date); if (inName == "") { showBanner = true; } else { Logit.WriteLine("Input file name is " + inName + "; output suffx is " + outName); // Something to refactor: make this line not required. // SFConverter.jobIni = new XMLini(jobOptionsName); // Here we instantiate the object that does most of the work. SFConverter.scripture = new Scriptures(); // Write out the USFM file SFConverter.scripture.USFXtoUSFM(inName, Path.GetDirectoryName(outName), Path.GetFileName(outName)); } if (showBanner) { Logit.WriteLine(""); Logit.WriteLine(Version.copyright); Logit.WriteLine(""); Logit.WriteLine(Version.contact); Logit.WriteLine(@" Syntax: usfx2usfm [-o Output] [-?] [-l logname] [-i] inputfile Output = output USFM path and file name suffix default is .sfm book code will be added, i. e. pdg.sfm -> MATpdg.sfm and subdir\eng.sfm -> subdir\MATeng.sfm logname is name of log file to write, default is WordSendLog.txt -h or -? = show this information. inputfile = name of USFX file to convert to USFM File names with embedded spaces must be surrounded by quotes. Do not use - as the first character of a path or file name. "); Logit.CloseFile(); } }
/// <summary> /// Write title.json and info.json /// </summary> protected override void WriteContentsPage() { int i; bool needComma; StreamWriter infojson, titlejson; ChapterInfo ci; if (shortTitle == string.Empty) { shortTitle = englishDescription; } string lastBook = string.Empty; string countryArray; countryArray = "\"countries\":[\"" + countries.Replace(" ", "\",\"") + "\"]"; /* * switch (languageIdentifier) * { * case "cmn": * countryArray = "\"countries\": [\"CN\",\"HK\",\"MO\",\"TW\"]"; * break; * case "eng": * countryArray = "\"countries\":[\"AU\",\"CA\",\"GB\",\"NZ\",\"US\"]"; * break; * case "fra": * countryArray = "\"countries\":[\"BE\",\"CD\",\"CI\",\"CM\",\"FR\",\"HT\"]"; * break; * case "deu": * countryArray = "\"countries\":[\"AT\",\"BE\",\"CH\",\"DE\",\"LI\",\"LU\"]"; * break; * case "rus": * countryArray = "\"countries\":[\"BY\",\"KG\",\"KZ\",\"RU\",\"TJ\"]"; * break; * case "spa": * countryArray = "\"countries\": [\"AR\",\"BO\",\"CL\",\"CO\",\"CR\",\"CU\",\"DO\",\"EC\",\"ES\",\"HN\",\"MX\",\"NI\",\"PA\",\"PE\",\"PR\",\"PY\",\"SV\",\"UY\",\"VE\"]"; * break; * default: * countryArray = "\"countries\":[\"" + countries.Replace(" ", "\",\"") + "\"]"; * break; * } */ countryArray = countryArray + ",\n"; string allCountriesArray = "\"allcountries\":[\"" + countries.Replace(" ", "\",\"") + "\"],\n"; try { if (String.IsNullOrEmpty(traditionalAbbreviation)) { traditionalAbbreviation = translationIdentifier; } infojson = new StreamWriter(Path.Combine(htmDir, "info.json")); titlejson = new StreamWriter(Path.Combine(htmDir, "title.json")); infojson.Write("{\n"); infojson.Write("\"id\":\"{0}\",\n", translationIdentifier); infojson.Write("\"haiola_id\":\"{0}\",\n", translationIdentifier); infojson.Write("\"fcbh_id\":\"{0}\",\n", fcbhId); infojson.Write("\"type\":\"bible\",\n"); infojson.Write("\"name\":\"{0}\",\n", translationName); infojson.Write("\"nameEnglish\":\"{0}\",\n", shortTitle); if (projectOptions.hasStrongs) { infojson.Write("\"hasLemma\":true,\n"); } else { infojson.Write("\"hasLemma\":false,\n"); } infojson.Write("\"abbr\":\"{0}\",\n", traditionalAbbreviation); infojson.Write("\"dir\":\"{0}\",\n", textDirection); infojson.Write("\"lang\":\"{0}\",\n", languageIdentifier); infojson.Write("\"langName\":\"{0}\",\n", languageNameInVernacular); infojson.Write("\"langNameEnglish\":\"{0}\",\n", languageNameInEnglish); infojson.Write("\"fontClass\":\"{0}\",\n", fontClass); infojson.Write("\"script\":\"{0}\",\n", script); infojson.Write("\"dialectCode\":\"{0}\",\n", dialectCode); infojson.Write("\"audioDirectory\":\"{0}\",\n", fcbhId); infojson.Write("\"fcbh_drama_nt\":\"{0}\",\n", fcbhDramaNt); infojson.Write("\"fcbh_drama_ot\":\"{0}\",\n", fcbhDramaOt); infojson.Write("\"fcbh_audio_nt\":\"{0}\",\n", fcbhAudioNt); infojson.Write("\"fcbh_audio_ot\":\"{0}\",\n", fcbhAudioOt); infojson.Write("\"fcbh_portion\":\"{0}\",\n", fcbhPortion); infojson.Write(numbers + "\n"); infojson.Write("\"country\":\"{0}\",\n", country); infojson.Write("\"countryCode\":\"{0}\",\n", countryCode); infojson.Write(countryArray); infojson.Write(allCountriesArray); infojson.Write("\"stylesheet\":\"{0}\",\n", customCssName); infojson.Write("\"timeGenerated\":\"{0}\",\n", DateTime.UtcNow.ToString("s")); titlejson.Write("{\n"); titlejson.Write("\"id\":\"{0}\",\n", translationIdentifier); titlejson.Write("\"type\":\"bible\",\n"); titlejson.Write("\"name\":\"{0}\",\n", translationName); titlejson.Write("\"nameEnglish\":\"{0}\",\n", shortTitle); if (projectOptions.hasStrongs) { titlejson.Write("\"hasLemma\":true,\n"); } else { titlejson.Write("\"hasLemma\":false,\n"); } if (projectOptions.redistributable) { titlejson.Write("\"redistributable\":true,\n"); } else { titlejson.Write("\"redistributable\":false,\n"); } if (projectOptions.redistributable) { infojson.Write("\"redistributable\":true,\n"); } else { infojson.Write("\"redistributable\":false,\n"); } titlejson.Write("\"abbr\":\"{0}\",\n", traditionalAbbreviation); titlejson.Write("\"dir\":\"{0}\",\n", textDirection); titlejson.Write("\"lang\":\"{0}\",\n", languageIdentifier); titlejson.Write("\"country\":\"{0}\",\n", country); titlejson.Write(countryArray); titlejson.Write(allCountriesArray); titlejson.Write("\"langNameEnglish\":\"{0}\",\n", languageNameInEnglish); titlejson.Write("\"langName\":\"{0}\",\n", languageNameInVernacular); titlejson.Write("\"fontClass\":\"{0}\"\n", fontClass); titlejson.Write("}\n"); titlejson.Close(); needComma = false; infojson.Write("\"divisionNames\":["); for (i = 0; i < bookInfo.publishArrayCount; i++) { if (bookInfo.publishArray[i].IsPresent && (bookInfo.publishArray[i].chapterFiles != null) && (bookInfo.publishArray[i].chaptersFound.Count > 0)) { // This book is in the input files and contains at least one character of text. if (needComma) { infojson.Write(","); } infojson.Write("\"{0}\"", bookInfo.publishArray[i].vernacularShortName); needComma = true; } } infojson.Write("],\n"); needComma = false; infojson.Write("\"divisions\":["); for (i = 0; i < bookInfo.publishArrayCount; i++) { if (bookInfo.publishArray[i].IsPresent && (bookInfo.publishArray[i].chapterFiles != null) && (bookInfo.publishArray[i].chaptersFound.Count > 0)) { // This book is in the input files and contains at least one character of text. if (needComma) { infojson.Write(","); } infojson.Write("\"{0}\"", bookInfo.publishArray[i].shortCode); needComma = true; } } infojson.Write("],\n"); bool hasShortAbbreviations = true; StringBuilder sb = new StringBuilder("\"divisionAbbreviations\":["); needComma = false; for (i = 0; i < bookInfo.publishArrayCount; i++) { if (bookInfo.publishArray[i].IsPresent && (bookInfo.publishArray[i].chapterFiles != null) && (bookInfo.publishArray[i].chaptersFound.Count > 0)) { if (needComma) { sb.Append(","); } string abbr = bookInfo.publishArray[i].vernacularAbbreviation; if (abbr.Length < 3) { sb.Append(String.Format("\"{0}\"", abbr)); } else { hasShortAbbreviations = false; i = bookInfo.publishArrayCount; } needComma = true; } } sb.Append("],"); if (hasShortAbbreviations) { infojson.Write(sb.ToString() + "\n"); } needComma = false; infojson.Write("\"sections\":["); for (i = 0; i < bookInfo.allChapters.Count; i++) { ci = (ChapterInfo)bookInfo.allChapters[i]; if ((ci != null) && (ci.chapterId.Length > 2)) { if (needComma) { infojson.Write(","); if (ci.chapterId.Substring(0, 2) != lastBook) { infojson.Write("\n"); } } infojson.Write("\"{0}\"", ci.chapterId); } needComma = true; lastBook = ci.chapterId.Substring(0, 2); } infojson.Write("]\n}\n"); infojson.Close(); } catch (Exception err) { Logit.WriteError(err.Message + " writing contents files version.json and index.html in " + htmDir); Logit.WriteError(err.StackTrace); } }
/// <summary> /// Turns a Crosswire mod2imp.exe "OSIS" output into an XML file. /// </summary> /// <param name="infile">File generated with mod2imp.exe</param> /// <param name="outfile">Intermediate XML file</param> public void ImpOsis2Xml(string infile, string outfile) { StreamReader sr; StreamWriter sw; string line; string trimmedLine; int c, v; string ch = "0"; //string lastCh = "0"; string vs = "0"; //string lastVs = "0"; string bcv; string lastBookAbbrev = String.Empty; currentBookAbbrev = String.Empty; bool inBook = false; bool inIntro = false; try { sr = new StreamReader(infile, Encoding.UTF8); sw = new StreamWriter(outfile, false, Encoding.UTF8); sw.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>"); sw.WriteLine("<impxml>"); line = sr.ReadLine().Replace("&c.", "&c."); while (line != null) { trimmedLine = line.Trim(); if (trimmedLine.Length > 0) { if (trimmedLine.StartsWith("$$$")) { if (!trimmedLine.Contains("[")) { currentBookAbbrev = ParseImpLocation(trimmedLine, out ch, out vs); if (lastBookAbbrev != currentBookAbbrev) { if (inBook) { sw.WriteLine("</book>"); } sw.WriteLine("<book id=\"{0}\">", currentBookAbbrev); lastBookAbbrev = currentBookAbbrev; inBook = true; } if ((!String.IsNullOrEmpty(vs)) && (vs == "0") && !inIntro) { sw.WriteLine("<p sfm=\"ip\">"); inIntro = true; } if ((!String.IsNullOrEmpty(vs)) && (vs != "0") && inIntro) { sw.WriteLine("</p>"); inIntro = false; } /* * if ((!String.IsNullOrEmpty(vs)) && (vs != "0")) * sw.WriteLine("<v id=\"{0}\"/>", vs); */ } } else if (trimmedLine.StartsWith("<verse")) { bcv = ExtractOsisId(line); bcv = ParseOsisId(bcv, out currentBookAbbrev, out c, out v); if (lastBookAbbrev != currentBookAbbrev) { if (inBook) { sw.WriteLine("</book>"); } sw.WriteLine("<book id=\"{0}\">", currentBookAbbrev); lastBookAbbrev = currentBookAbbrev; inBook = true; } sw.WriteLine(line); } else /*if (trimmedLine.StartsWith("<"))*/ { string temp = System.Text.RegularExpressions.Regex.Replace(line, "<div [^>]*>", ""); temp = System.Text.RegularExpressions.Regex.Replace(temp, "<chapter [^>]*eID=[^>]*>", "</p>"); temp = System.Text.RegularExpressions.Regex.Replace(temp, "lemma=\"Strong:", "lemma=\""); temp = System.Text.RegularExpressions.Regex.Replace(temp, "(<chapter [^>]*>)", "$1<p>"); sw.WriteLine(temp); } /* * else * { * sw.WriteLine("<ip>{0}</ip>", line); * } */ } line = sr.ReadLine(); if (line != null) { line = line.Replace("&c.", "&c."); } } if (inBook) { sw.WriteLine("</book>"); } sw.WriteLine("</impxml>"); sw.Close(); sr.Close(); } catch (Exception ex) { Logit.WriteError(ex.Message); } }
static void Main(string[] args) { int i; bool showBanner = true; string logName = "usfm2usfxlog.txt"; string outName = ""; ArrayList fileSpecs = new ArrayList(127); Logit.useConsole = true; for (i = 0; i < args.Length; i++) { // Scan the command line string s = args[i]; if ((s != null) && (s.Length > 0)) { if (((s[0] == '-') || (s[0] == '/')) && (s.Length > 1)) { // command line switch: take action switch (Char.ToLower(s[1])) { case 'n': // No banner display showBanner = false; break; case 'o': // Set output file name outName = SFConverter.GetOption(ref i, args); break; case 'l': // Set log name logName = SFConverter.GetOption(ref i, args); break; case '-': case 'h': case '?': showBanner = true; break; default: Logit.WriteLine("Unrecognized command line switch: " + args[i]); showBanner = true; break; } } else { fileSpecs.Add(args[i]); } } } Logit.OpenFile(logName); if (showBanner) { Logit.WriteLine("This is part of Haiola open source software."); Logit.WriteLine("Please see http://haiola.org for copyright information."); Logit.WriteLine(@" Syntax: usfm2usfx [-o Output] [-n] [-l logname] [-?] filespec(s) Output = output USFX file name -n = don't display copyright and banner information. logname = log file name (default is usfm2usfxlog.txt) -? = cancel previous /n and show this information. filespec = SFM file specification(s) to read. Wild cards are OK. You may use / instead of - to introduce switches. Do not use either of those two characters as the first character of a file name. "); } if (outName == "") { outName = "output.usfx.xml"; } if (fileSpecs.Count < 1) { Logit.WriteLine("Nothing to do. No input files specified."); } else { // Instantiate the object that does most of the work. SFConverter.scripture = new Scriptures(); // Read the input USFM files into internal data structures. for (i = 0; i < fileSpecs.Count; i++) { SFConverter.ProcessFilespec((string)fileSpecs[i]); } // Write out the USFX file. SFConverter.scripture.WriteUSFX(outName); } }
/// <summary> /// Reads a Paratext .ssf file and imports selected configuration items into our own Options object. /// </summary> /// <param name="projectOptions">Options object to update with data in the Paratext .ssf file.</param> /// <param name="ssfFileName">Full path to the Paratext .ssf file to update.</param> public void ReadParatextSsf(Options projectOptions, string ssfFileName) { string elementName, setting; try { if (!File.Exists(ssfFileName)) { return; } XmlTextReader ssf = new XmlTextReader(ssfFileName); ssf.WhitespaceHandling = WhitespaceHandling.Significant; ssf.MoveToContent(); while (ssf.Read()) { if ((ssf.NodeType == XmlNodeType.Element) && (ssf.Name != "ScriptureText")) { if (!ssf.IsEmptyElement) { elementName = ssf.Name; ssf.Read(); // Get content of element if ((ssf.NodeType == XmlNodeType.Text) && (!String.IsNullOrEmpty(ssf.Value))) { setting = ssf.Value; switch (elementName) { case "Encoding": if (setting != "65001") { Logit.WriteLine("Warning: Paratext encoding is not Unicode UTF-8 (" + setting + ") in " + ssfFileName); } break; case "EthnologueCode": if (projectOptions.languageId.Length < 3) { projectOptions.languageId = setting; } break; case "RangeIndicator": // verse range separator projectOptions.rangeSeparator = setting; break; case "SequenceIndicator": projectOptions.multiRefSameChapterSeparator = setting; break; case "ChapterVerseSeparator": projectOptions.CVSeparator = setting; break; case "ChapterRangeSeparator": projectOptions.multiRefDifferentChapterSeparator = setting; break; case "BookSequenceSeparator": projectOptions.BookSequenceSeparator = setting; break; case "ChapterNumberSeparator": projectOptions.ChapterNumberSeparator = setting; break; case "BookSourceForMarkerXt": projectOptions.BookSourceForMarkerXt = setting; break; case "BookSourceForMarkerR": projectOptions.BookSourceForMarkerR = setting; break; case "Guid": projectOptions.paratextGuid = setting; break; } } } } } ssf.Close(); projectOptions.Write(); } catch (Exception ex) { Logit.WriteError("Error reading Paratext options file " + ssfFileName + ": " + ex.Message); Logit.WriteError(ex.StackTrace); } }