Exemple #1
0
        }         // fun

        public static void convertXml2text(string fileName)
        {
            //string xml = File.ReadAllText("c:/books/Martjyanov_Andrej_Chuzhie_Operaciya_Ryugen_.fb2.txt", System.Text.Encoding.Unicode);
            //BinaryReader br = new BinaryReader( File.Open( fileName, FileMode.Open ) );
            //byte [] arr = br.ReadBytes( 2 );

            _date         = _bookName = _sequence = null;
            _hadAnotation = false;
            AuthorData aData = new AuthorData();

            determineEncoding(fileName, aData);
            XmlDocument doc = new XmlDocument();

            using (StreamReader sr = new StreamReader(
                       fileName,
                       _encoding, true)) {
                doc.Load(sr);
            }  // end block

            string outFileName = getOutputFileName(fileName);

            using (FileStream stream = new FileStream(outFileName,
                                                      FileMode.Create)) {
                using (BinaryWriter writer = new BinaryWriter(stream)) {
                    parseChild(null, doc.ChildNodes, 0, writer, aData);
                    writer.Flush();
                    writer.Close();
                }     // end block
            } // end block
        } // fun
Exemple #2
0
        } // fun

        private static void parseChild(string parentName, XmlNodeList child, int level,
                                       BinaryWriter outBuf, AuthorData aData)
        {
            if (child == null)
            {
                return;
            }

            for (int i = 0; i < child.Count; i++)
            {
                // check if we in test mode for detection encoding
                if (outBuf == null &&
                    aData.hasData())
                {
                    return;
                }

                XmlNode n = child.Item(i);
                string  k = n.Name;
                if (_ignoreElementsMap.Contains(k))
                {
                    continue;
                }

                //if ( aData.getName() != null && aData.getLastName() != null)
                //continue;

                if (n.HasChildNodes == false)
                {
                    continue;
                }

                if (n.ChildNodes.Count == 1 &&
                    n.ChildNodes.Item(0).HasChildNodes == false)
                {
                    XmlNodeType t = n.NodeType;
                    switch (t)
                    {
                    case XmlNodeType.Text:
                        //case XmlNodeType.Entity:
                        //Console.Out.WriteLine(n.Value);
                        break;

                    case XmlNodeType.Element:
                        processLeafElement(parentName, n, level, outBuf, aData);
                        break;

                    default:
                        break;
                    } // end switch
                }
                else     // mor from one child of have sub child
                {
                    parseChild(k, n.ChildNodes, level + 1, outBuf, aData);
                } // if
            }     // loop
        }         // gfun
Exemple #3
0
        } // fun

        private static void determineEncoding(string fileName, AuthorData aData)
        {
            using (StreamReader sr = new StreamReader(fileName)) {
                Encoding enc = sr.CurrentEncoding;
                //Console.Out.WriteLine( "got encoding from file: " + enc );
                //if ( enc != Encoding.UTF8 ) // utf8 is invalid encoding
                _encoding = enc;
                XmlDocument doc = new XmlDocument();
                doc.Load(sr);
                parseChild(null, doc.ChildNodes, 0, null, aData);
            } // end block

            // validate if name and last name are correct
            if (_encoding == Encoding.UTF8)
            {
                if ((aData.getName() != null && isCorrectEncoding(aData.getName()) == false) ||
                    (aData.getLastName() != null && isCorrectEncoding(aData.getLastName()) == false))
                {
                    _encoding = Encoding.GetEncoding(1251);
                    aData.clear();
                } // if
            }     // if
        }         // fun
Exemple #4
0
        }         // fun

        private static void processLeafElement(string parentName,
                                               XmlNode node, int level,
                                               BinaryWriter outBuf, AuthorData aData)
        {
            string k = node.Name;
            string v = node.ChildNodes.Item(0).Value;

            if (v == null ||
                v.Trim().Length < 1)
            {
                return;
            }

            // validate if author is already apeared
            switch (k)
            {
            case "genre":
                if (aData.getGenre() == null)
                {
                    aData.setGenre(v);
                }

                return;

            case "first-name":
                if (aData.getName() == null)      // does not appear
                {
                    aData.setName(v);
                }

                return;

            case "last-name":
                if (aData.getLastName() == null)      // does not  appear
                {
                    aData.setLastName(v);
                }

                return;

            case "middle-name":
                if (aData.getMiddleName() == null)
                {
                    aData.setMiddleName(v);
                }

                return;

            case "book-title":     //if ( _bookName ) // already appears
                if (aData.getBookTitle() == null)
                {
                    aData.setBookTitle(v);
                }

                return;

            case "date":
                if (_date != null)
                {
                    return;
                }

                _date = k;
                break;

            case "year":
                if (_date != null)
                {
                    return;
                }
                break;

            case "sequence":
                if (_sequence != null)
                {
                    return;
                }

                _sequence = k;
                break;
            } // end switch

            if (outBuf == null)
            {
                return; // we can't write
            }
            // write author data a head
            if (aData.isWroteData() == false &&
                aData.hasData())
            {
                outBuf.Write(_encoding.GetBytes(aData.ToString()));
                aData.isWroteData(true);
            } // if

            if (parentName == "annotation" &&
                _hadAnotation == false)
            {
                outBuf.Write(parentName + "\r\n");
            }

            if (k == "sequence" ||
                k == "date")
            {
                outBuf.Write(k + " - ");
            }

            v = divideLine(v.Trim());
            if (v.StartsWith("http://") || v.StartsWith("www."))
            {
                return;
            }

            outBuf.Write(_encoding.GetBytes(v));
            outBuf.Write("\r\n");

            if (parentName == "annotation" &&
                _hadAnotation == false)
            {
                outBuf.Write("\r\n");
                _hadAnotation = true;
            } // if
        }     // fun