} // fun public static void convertXml2text(string fileName) { //string xml = File.ReadAllText("c:/books/Martjyanov_Andrej_Chuzhie_Operaciya_Ryugen_.fb2.txt", System.Text.Encoding.Unicode); //BinaryReader br = new BinaryReader( File.Open( fileName, FileMode.Open ) ); //byte [] arr = br.ReadBytes( 2 ); _date = _bookName = _sequence = null; _hadAnotation = false; AuthorData aData = new AuthorData(); determineEncoding(fileName, aData); XmlDocument doc = new XmlDocument(); using (StreamReader sr = new StreamReader( fileName, _encoding, true)) { doc.Load(sr); } // end block string outFileName = getOutputFileName(fileName); using (FileStream stream = new FileStream(outFileName, FileMode.Create)) { using (BinaryWriter writer = new BinaryWriter(stream)) { parseChild(null, doc.ChildNodes, 0, writer, aData); writer.Flush(); writer.Close(); } // end block } // end block } // fun
} // fun private static void parseChild(string parentName, XmlNodeList child, int level, BinaryWriter outBuf, AuthorData aData) { if (child == null) { return; } for (int i = 0; i < child.Count; i++) { // check if we in test mode for detection encoding if (outBuf == null && aData.hasData()) { return; } XmlNode n = child.Item(i); string k = n.Name; if (_ignoreElementsMap.Contains(k)) { continue; } //if ( aData.getName() != null && aData.getLastName() != null) //continue; if (n.HasChildNodes == false) { continue; } if (n.ChildNodes.Count == 1 && n.ChildNodes.Item(0).HasChildNodes == false) { XmlNodeType t = n.NodeType; switch (t) { case XmlNodeType.Text: //case XmlNodeType.Entity: //Console.Out.WriteLine(n.Value); break; case XmlNodeType.Element: processLeafElement(parentName, n, level, outBuf, aData); break; default: break; } // end switch } else // mor from one child of have sub child { parseChild(k, n.ChildNodes, level + 1, outBuf, aData); } // if } // loop } // gfun
} // fun private static void determineEncoding(string fileName, AuthorData aData) { using (StreamReader sr = new StreamReader(fileName)) { Encoding enc = sr.CurrentEncoding; //Console.Out.WriteLine( "got encoding from file: " + enc ); //if ( enc != Encoding.UTF8 ) // utf8 is invalid encoding _encoding = enc; XmlDocument doc = new XmlDocument(); doc.Load(sr); parseChild(null, doc.ChildNodes, 0, null, aData); } // end block // validate if name and last name are correct if (_encoding == Encoding.UTF8) { if ((aData.getName() != null && isCorrectEncoding(aData.getName()) == false) || (aData.getLastName() != null && isCorrectEncoding(aData.getLastName()) == false)) { _encoding = Encoding.GetEncoding(1251); aData.clear(); } // if } // if } // fun
} // fun private static void processLeafElement(string parentName, XmlNode node, int level, BinaryWriter outBuf, AuthorData aData) { string k = node.Name; string v = node.ChildNodes.Item(0).Value; if (v == null || v.Trim().Length < 1) { return; } // validate if author is already apeared switch (k) { case "genre": if (aData.getGenre() == null) { aData.setGenre(v); } return; case "first-name": if (aData.getName() == null) // does not appear { aData.setName(v); } return; case "last-name": if (aData.getLastName() == null) // does not appear { aData.setLastName(v); } return; case "middle-name": if (aData.getMiddleName() == null) { aData.setMiddleName(v); } return; case "book-title": //if ( _bookName ) // already appears if (aData.getBookTitle() == null) { aData.setBookTitle(v); } return; case "date": if (_date != null) { return; } _date = k; break; case "year": if (_date != null) { return; } break; case "sequence": if (_sequence != null) { return; } _sequence = k; break; } // end switch if (outBuf == null) { return; // we can't write } // write author data a head if (aData.isWroteData() == false && aData.hasData()) { outBuf.Write(_encoding.GetBytes(aData.ToString())); aData.isWroteData(true); } // if if (parentName == "annotation" && _hadAnotation == false) { outBuf.Write(parentName + "\r\n"); } if (k == "sequence" || k == "date") { outBuf.Write(k + " - "); } v = divideLine(v.Trim()); if (v.StartsWith("http://") || v.StartsWith("www.")) { return; } outBuf.Write(_encoding.GetBytes(v)); outBuf.Write("\r\n"); if (parentName == "annotation" && _hadAnotation == false) { outBuf.Write("\r\n"); _hadAnotation = true; } // if } // fun