Esempio n. 1
0
        /// <summary>
        /// Start USFX book
        /// </summary>
        /// <param name="tla"></param>
        protected void StartBook(string tla)
        {
            EndBook();
            holyBooks.xw.WriteStartElement("book");
            holyBooks.xw.WriteAttributeString("id", tla);
            holyBooks.xw.WriteStartElement("id");
            holyBooks.xw.WriteAttributeString("id", tla);
            BibleBookRecord bkrecord = holyBooks.bkInfo.BkRec(tla);

            if (bkrecord != null)
            {
                holyBooks.xw.WriteString(" " + bkrecord.name);
            }
            holyBooks.xw.WriteEndElement(); // id
            inBook = true;
        }
Esempio n. 2
0
        protected override string MainFileLinkTarget(string bookAbbrev, string chapter)
        {
            BibleBookRecord bkRec = (BibleBookRecord)bookInfo.books[bookAbbrev];

            return(MainFileLinkTarget(string.Format("{0}.{1}.htm", bkRec.tla, chapter)));
        }
Esempio n. 3
0
        public bool ConvertUsfxToSile(string usfxFileName, string sileDirectory)
        {
            bool result = false;

            sileDir = sileDirectory;
            usfx    = new XmlTextReader(usfxFileName);
            usfx.WhitespaceHandling = WhitespaceHandling.All;

            while (usfx.Read())
            {
                Logit.ShowStatus("converting to SILE " + cv);
                if (usfx.NodeType == XmlNodeType.Element)
                {
                    level  = usfx.GetAttribute("level");
                    style  = usfx.GetAttribute("style");
                    sfm    = usfx.GetAttribute("sfm");
                    caller = usfx.GetAttribute("caller");
                    id     = usfx.GetAttribute("id");
                    switch (usfx.Name)
                    {
                    case "languageCode":
                        SkipElement();
                        break;

                    case "book":
                        currentBookHeader = currentBookTitle = String.Empty;
                        toc1           = toc2 = toc3 = String.Empty;
                        inToc1         = inToc2 = inToc3 = false;
                        currentChapter = currentChapterPublished = currentChapterAlternate = String.Empty;
                        currentVerse   = currentVersePublished = currentVerseAlternate = String.Empty;
                        titleWritten   = false;
                        chapterWritten = false;
                        if (id.Length > 2)
                        {
                            currentBookAbbrev = id;
                            bookRecord        = (BibleBookRecord)bookInfo.books[currentBookAbbrev];
                        }
                        if ((bookRecord == null) || (id.Length <= 2))
                        {
                            Logit.WriteError("Cannot process unknown book: " + currentBookAbbrev);
                            return(false);
                        }
                        if ((bookRecord.testament == "a") && !globe.projectOptions.includeApocrypha)
                        {
                            SkipElement();
                        }
                        else if (!globe.projectOptions.allowedBookList.Contains(bookRecord.tla))    // Check for presence of book in bookorder.txt
                        {
                            SkipElement();
                        }
                        else
                        {       // We have a book we want to write out.
                            OpenSileFile();
                        }
                        break;

                    case "fe":      // End note. Rarely used, fortunately, but in the standards. Treat as regular footnote.
                    case "f":       //  footnote
                        if (!usfx.IsEmptyElement)
                        {
                            if (caller == "-")
                            {
                                caller = String.Empty;
                            }
                            else if ((caller == "+") || (String.IsNullOrEmpty(caller)))
                            {
                                caller = footnoteMark.Marker();
                            }
                            sileFile.WriteStartElement("f");
                            sileFile.WriteAttributeString("caller", caller);
                        }
                        break;

                    case "x":       // Cross references
                        if (!usfx.IsEmptyElement)
                        {
                            if (caller == "-")
                            {
                                caller = String.Empty;
                            }
                            else if ((caller == "+") || (String.IsNullOrEmpty(caller)))
                            {
                                caller = xrefMark.Marker();
                            }
                            sileFile.WriteStartElement("x");
                            sileFile.WriteAttributeString("caller", caller);
                        }
                        break;

                    case "ide":
                    case "fm":      // Should not actually be in any field texts. Safe to skip.
                    case "idx":     // Peripherals - Back Matter Index
                        SkipElement();
                        break;

                    case "ie":      // Introduction end
                        SkipElement();
                        break;

                    case "id":
                        if (id != currentBookAbbrev)
                        {
                            Logit.WriteError("Book ID in <id> and <book> do not match: " + currentBookAbbrev + " is not " + id);
                        }
                        SkipElement();      // Strip out comment portion.
                        break;

                    case "toc":     // Table of Contents entries
                        if (String.IsNullOrEmpty(level) || (level == "1"))
                        {
                            inToc1 = true;
                        }
                        else if (level == "2")
                        {
                            inToc2 = true;
                        }
                        else if (level == "3")
                        {
                            inToc3 = true;
                        }
                        else
                        {
                            SkipElement();
                        }
                        break;

                    case "rem":     // Comment; not part of the actual text
                        SkipElement();
                        break;

                    case "h":
                        currentBookHeader = ReadElementText().Trim();
                        break;

                    case "c":
                        currentChapter          = id;
                        currentChapterPublished = fileHelper.LocalizeDigits(currentChapter);
                        currentChapterAlternate = String.Empty;
                        currentVerse            = currentVersePublished = currentVerseAlternate = String.Empty;
                        currentChapterPublished = chapterLabel + fileHelper.LocalizeDigits(ReadElementText().Trim());
                        chapterWritten          = false;
                        break;

                    case "cl":
                        if (currentChapter == String.Empty)
                        {
                            chapterLabel = ReadElementText().Trim() + " ";
                        }
                        else
                        {
                            currentChapterPublished = ReadElementText().Trim();
                        }
                        break;

                    case "cp":
                        if (!usfx.IsEmptyElement)
                        {
                            currentChapterPublished = ReadElementText().Trim();
                        }
                        break;

                    case "v":
                        PrintChapter();
                        currentVersePublished = fileHelper.LocalizeDigits(id);
                        currentVerse          = id.Replace("\u200F", ""); // Strip out RTL character
                        currentVerseAlternate = "";
                        if (!usfx.IsEmptyElement)
                        {
                            usfx.Read();
                            if (usfx.NodeType == XmlNodeType.Text)
                            {
                                currentVersePublished = fileHelper.LocalizeDigits(usfx.Value.Trim());
                            }
                            if (usfx.NodeType != XmlNodeType.EndElement)
                            {
                                usfx.Read();
                            }
                        }
                        break;

                    case "va":      // Not supported by The Sword Project
                        SkipElement();
                        break;

                    case "vp":
                        SkipElement();

                        /* This feature is not supported by The Sword Project.
                         * if (!usfx.IsEmptyElement)
                         * {
                         *  usfx.Read();
                         *  if (usfx.NodeType == XmlNodeType.Text)
                         *  {
                         *      currentVersePublished = usfx.Value.Trim();
                         *      if (currentVersePublished.Length > 0)
                         *      {
                         *          vpeID = StartId();
                         *          StartMosisElement("verse");
                         *          mosis.WriteAttributeString("osisID", osisVerseId);
                         *          mosis.WriteAttributeString("sID", verseeID);
                         *          mosis.WriteAttributeString("n", currentVersePublished);
                         *          WriteMosisEndElement();    // verse
                         *      }
                         *  }
                         * }
                         */
                        break;

                    case "periph":
                        SkipElement();
                        break;

                    case "cs":      // Rare or new character style: don't know what it should be, so throw away tag & keep text.
                        break;

                    case "gw":      // Do nothing. Not sure what to do with glossary words, yet.
                    case "xt":      // Do nothing.
                    case "ft":
                        // Ignore. It does nothing useful, but is an artifact of USFM exclusive character styles.
                        break;

                    case "usfx":
                        // Nothing to do, here.
                        break;

                    case "dc":
                    case "xdc":
                    case "fdc":
                        if (!globe.projectOptions.includeApocrypha)
                        {
                            SkipElement();
                        }
                        break;

                    default:
                        sileFile.WriteStartElement(usfx.Name);
                        if (id != null)
                        {
                            sileFile.WriteAttributeString("id", id);
                        }
                        if (caller != null)
                        {
                            sileFile.WriteAttributeString("caller", caller);
                        }
                        if (level != null)
                        {
                            sileFile.WriteAttributeString("level", level);
                        }
                        if (sfm != null)
                        {
                            sileFile.WriteAttributeString("sfm", sfm);
                        }
                        if (style != null)
                        {
                            sileFile.WriteAttributeString("style", style);
                        }
                        if (usfx.IsEmptyElement)
                        {
                            sileFile.WriteEndElement();
                        }
                        break;
                    }
                }
                else if (usfx.NodeType == XmlNodeType.EndElement)
                {
                    if (inToc1 || inToc2)
                    {
                        if (usfx.Name == "toc")
                        {
                            inToc2 = inToc1 = false;
                        }
                        else if (inToc1 && usfx.Name == "it")
                        {
                            toc1 += "</hi></seg>";
                        }
                        else
                        {
                            Logit.WriteLine("Warning: " + usfx.Name + " end markup in title at " + currentBookAbbrev + " not written to OSIS file");
                        }
                    }
                    else
                    {
                        switch (usfx.Name)
                        {
                        case "w":
                        case "zw":
                            if (inStrongs)
                            {
                                WriteMosisEndElement();
                                inStrongs = false;
                            }
                            break;

                        case "wj":
                            WriteMosisEndElement();        // q
                            break;

                        case "book":
                            EndLineGroup();
                            EndCurrentVerse();
                            EndCurrentChapter();
                            EndIntroduction();
                            EndMajorSection();
                            WriteMosisEndElement();      // div type="book"
                            CheckElementLevel(3, "closed book");
                            break;

                        case "bdit":
                            WriteMosisEndElement();        // hi italic
                            WriteMosisEndElement();        // hi bold
                            break;

                        case "p":
                            if (itemLevel > 0)
                            {
                                itemLevel--;
                            }
                            CheckMinimumLevel(5, "Ending " + usfx.Name + " " + osisVerseId);
                            inNote = false;
                            if (eatPoetryLineEnd)
                            {
                                eatPoetryLineEnd = false;
                            }
                            else
                            {
                                WriteMosisEndElement();
                            }
                            break;

                        case "q":
                            if (eatPoetryLineEnd)
                            {
                                eatPoetryLineEnd = false;
                            }
                            else
                            {
                                WriteMosisEndElement();
                            }
                            break;

                        case "ref":
                            if (inReference)
                            {
                                WriteMosisEndElement();     // reference
                                inReference = false;
                            }
                            break;

                        case "fe":
                        case "f":
                        case "x":
                            if (inNote)
                            {
                                inNote = false;
                                WriteMosisEndElement();     // End of note
                            }
                            break;

                        case "add":
                            if (!inNote)
                            {
                                WriteMosisEndElement();
                            }
                            break;

                        case "qs":
                            if (inLineGroup)
                            {
                                WriteMosisEndElement();
                                inPoetryLine = false;
                            }

                            break;

                        case "bd":
                        case "bk":
                        case "cl":
                        case "d":
                        case "dc":
                        case "em":
                        case "fk":
                        case "fp":
                        case "fq":
                        case "fqa":
                        case "fr":
                        case "fv":
                        case "k":
                        case "no":
                        case "pn":
                        case "qac":
                        case "qt":
                        case "r":
                        case "rq":
                        case "s":
                        case "sc":
                        case "sig":
                        case "sls":
                        case "table":
                        case "tc":
                        case "tcr":
                        case "th":
                        case "thr":
                        case "tl":
                        case "tr":
                        case "xo":
                        case "ord":
                            // case "xq": Not useful for Sword modules.
                            WriteMosisEndElement();        // note, hi, reference, title, l, transChange, etc.
                            break;

                        case "it":
                            if (!inStrongs)
                            {
                                WriteMosisEndElement();
                            }
                            break;

                        case "nd":
                            WriteMosisEndElement();     // divineName
                            WriteMosisEndElement();     // seg
                            break;

                        case "xk":
                        case "fl":
                        case "zcr":
                        case "zcb":
                        case "zcg":
                        case "zcy":
                            // not supported.
                            break;

                            /* Can't get to this case (caught in "if" above)
                             * case "toc":
                             * inToc2 = inToc1 = false;
                             * break;*/
                        }
                    }
                }
                else if (((usfx.NodeType == XmlNodeType.Text) || (usfx.NodeType == XmlNodeType.SignificantWhitespace) || (usfx.NodeType == XmlNodeType.Whitespace)) && !ignore)
                {
                    if (inToc1)
                    {
                        toc1 = toc1 + usfx.Value;
                    }
                    else if (inToc2)
                    {
                        toc2 = toc2 + usfx.Value;
                    }
                    else
                    {
                        mosis.WriteString(usfx.Value);
                    }
                }
            }
            return(result);
        }
Esempio n. 4
0
        /// <summary>
        /// Reads a USFX file and prepares it for full text search (or concordance generation)
        /// by extracting only the canonical text within verses (and the canonical Psalm titles,
        /// which are prepended to verse 1 text), stripping out all formatting, footnotes, etc.,
        /// and normalizing all white space to single spaces. These verse text strings are put
        /// into an XML file with one "v" element per verse, with book, chapter, and verse given
        /// in attributes b, c, and v, respectively.
        /// </summary>
        /// <param name="usfxFileName">Name of the USFX file to extract canonical text from</param>
        /// <param name="verseFileName">Name of XML unformatted verse text only file</param>
        /// <returns></returns>
        public bool Filter(string usfxFileName, string verseFileName)
        {
            string level   = String.Empty;
            string style   = String.Empty;
            string sfm     = String.Empty;
            string caller  = String.Empty;
            string id      = String.Empty;
            string strongs = String.Empty;

            verseText = new StringBuilder();
            lemmaText = new StringBuilder();
            bool result = false;

            try
            {
                utf8encoding         = new UTF8Encoding(false);
                vplFile              = new StreamWriter(Path.ChangeExtension(verseFileName, ".vpltxt"), false, utf8encoding);
                lemmaFile            = new XmlTextWriter(Path.ChangeExtension(verseFileName, ".lemma"), utf8encoding);
                lemmaFile.Formatting = Formatting.Indented;
                lemmaFile.WriteStartDocument();
                lemmaFile.WriteStartElement("lemmaFile");
                usfx = new XmlTextReader(usfxFileName);
                usfx.WhitespaceHandling = WhitespaceHandling.All;
                verseFile            = new XmlTextWriter(verseFileName, utf8encoding);
                verseFile.Formatting = Formatting.Indented;
                verseFile.WriteStartDocument();
                verseFile.WriteStartElement("verseFile");
                while (usfx.Read())
                {
                    if (!Logit.ShowStatus("extracting search text " + currentPlace))
                    {
                        return(false);
                    }
                    if (usfx.NodeType == XmlNodeType.Element)
                    {
                        level  = fileHelper.GetNamedAttribute(usfx, "level");
                        style  = fileHelper.GetNamedAttribute(usfx, "style");
                        sfm    = fileHelper.GetNamedAttribute(usfx, "sfm");
                        caller = fileHelper.GetNamedAttribute(usfx, "caller");
                        id     = fileHelper.GetNamedAttribute(usfx, "id");

                        switch (usfx.Name)
                        {
                        case "book":
                            currentChapter = String.Empty;
                            currentVerse   = String.Empty;
                            if (id.Length == 3)
                            {
                                currentBook    = id;
                                bookRecord     = (BibleBookRecord)bookInfo.books[currentBook];
                                osisBook       = bookRecord.osisName;
                                BibleWorksBook = bookRecord.bibleworksCode;
                            }
                            if ((bookRecord == null) || (id.Length != 3))
                            {
                                Logit.WriteError("Cannot process unknown book: " + currentBook);
                                SkipElement();
                            }
                            if (bookRecord.testament == "x")
                            {       // Skip peripherals.
                                SkipElement();
                            }
                            currentPlace = currentBook;
                            break;

                        case "id":
                            if (id != currentBook)
                            {
                                Logit.WriteError("Book ID in <id> and <book> do not match; " + currentBook + " is not " + id);
                            }
                            SkipElement();      // Strip out comment portion.
                            break;

                        case "h":
                            usfx.Read();
                            if (usfx.NodeType == XmlNodeType.Text)
                            {
                                bookRecord.vernacularShortName = usfx.Value.Trim();
                            }
                            break;

                        case "toc":
                            usfx.Read();
                            if (usfx.NodeType == XmlNodeType.Text)
                            {
                                if (level == "1")
                                {
                                    bookRecord.vernacularLongName = usfx.Value.Trim();
                                }
                                else if (level == "2")
                                {
                                    string sn = usfx.Value.Trim();
                                    if ((bookRecord.vernacularShortName.Length < 2) || (sn.Length < bookRecord.vernacularShortName.Length))
                                    {
                                        bookRecord.vernacularShortName = sn;
                                    }
                                }
                            }
                            break;

                        case "c":
                            EndVerse();     // In case file lacks <ve /> elements.
                            currentChapter = id;
                            currentVerse   = String.Empty;
                            currentPlace   = currentBook + "_" + currentChapter;
                            SkipElement();     // Doesn't skip chapter, just the published chapter number, if present.
                            break;

                        case "v":
                            EndVerse();     // In case file lacks <ve /> elements.
                            inVerse      = true;
                            currentVerse = id;
                            currentPlace = currentBook + "_" + currentChapter + "_" + currentVerse;
                            SkipElement();      // Just in case there is a published verse number present.
                            break;

                        case "ve":
                            EndVerse();
                            break;

                        case "b":       // blank line
                        case "optionalLineBreak":
                        case "qs":
                        case "th":
                        case "thr":
                        case "tc":
                        case "tcr":
                            if (inVerse)
                            {
                                verseText.Append(' ');
                            }
                            break;

                        case "d":       // Make canonical psalm titles searchable
                            inPsalmTitle = true;
                            break;

                        case "add":
                            verseText.Append("[");
                            break;

                        case "nd":
                            //verseText.Append("{");
                            break;

                        case "languageCode":
                        case "f":       //  footnote
                        case "fe":      // End note. Rarely used, fortunately, but in the standards.
                        case "x":       // Cross references
                        case "glo":
                        case "ide":
                        case "fig":     // figure
                        case "fdc":
                        case "fm":      // Should not actually be in any field texts. Safe to skip.
                        case "idx":     // Peripherals - Back Matter Index
                        case "ie":      // Introduction end
                        case "iex":     // Introduction explanatory or bridge text
                        case "fp":
                        case "rem":     // Comment; not part of the actual text
                        case "cl":
                        case "ca":
                        case "vp":
                        case "periph":
                        case "milestone":
                        case "rq":
                        case "s":
                            SkipElement();
                            break;

                        case "w":
                            strongs = fileHelper.GetNamedAttribute(usfx, "s");
                            if (!String.IsNullOrEmpty(strongs))
                            {
                                lemmaText.Append(strongs + " ");
                            }
                            break;

                        case "p":
                            if (sfm.StartsWith("i"))
                            {
                                SkipElement();
                            }
                            else
                            {
                                switch (sfm)
                                {
                                case "cd":
                                case "intro":
                                case "hr":          // Horizontal rule not supported. Try a line break.
                                case "ib":
                                case "im":
                                case "imq":
                                case "imi":
                                case "ip":
                                case "ipi":
                                case "ipq":
                                case "ipr":
                                case "mt":
                                case "keyword":
                                case "iq":
                                case "imte":
                                case "imt":
                                case "is":
                                case "iot":
                                case "ior":
                                case "io":
                                case "ili":
                                case "r":
                                    SkipElement();
                                    break;
                                }
                            }
                            break;
                        }
                    }
                    else if (usfx.NodeType == XmlNodeType.EndElement)
                    {
                        switch (usfx.Name)
                        {
                        case "book":
                            EndVerse();     // In case file lacks <ve /> elements.
                            currentBook = currentChapter = currentVerse = String.Empty;
                            break;

                        case "d":
                            inPsalmTitle = false;
                            break;

                        case "add":
                            verseText.Append("]");
                            break;

                        case "nd":
                            // verseText.Append("}");
                            break;
                        }
                    }
                    else if (usfx.NodeType == XmlNodeType.Text)
                    {
                        if (inVerse || inPsalmTitle)
                        {
                            verseText.Append(usfx.Value);
                        }
                    }
                    else if ((usfx.NodeType == XmlNodeType.SignificantWhitespace) || (usfx.NodeType == XmlNodeType.Whitespace))
                    {
                        if (inVerse || inPsalmTitle)
                        {
                            verseText.Append(" ");
                        }
                    }
                }
                Logit.ShowStatus("writing " + verseFileName);
                verseFile.WriteEndElement();    // verseFile
                lemmaFile.WriteEndElement();    // lemmaFile
                verseFile.Close();
                lemmaFile.Close();
                vplFile.Close();
                usfx.Close();
                result = true;
            }
            catch (Exception ex)
            {
                Logit.WriteError(ex.Message);
            }
            return(result);
        }