static void Main(string[] args) { if (args.Length < 1) { Console.WriteLine("Komento puuttuu!"); return; } string command = args[0]; if (command.Equals("otsikot")) { if (args.Length != 3 && args.Length != 4) { PrintHeadersHelp(); return; } if (args.Length == 3) { BigXMLProcess.PrintNFirst(inputXMLfilePath: args[2], howMany: int.Parse(args[1])); } else if (args.Length == 4) { BigXMLProcess.PrintNFirst(inputXMLfilePath: args[2], howMany: int.Parse(args[1]), outputFilePath: args[3]); } } else if (command.Equals("kaannokset")) { if (args.Length != 4) { PrintTranslationsHelp(); return; } BigXMLProcess.PrintNFirstTranslations(inputXMLfilePath: args[3], howMany: int.Parse(args[1]), translationLanguage: RemoveQuotesIfNeeded(args[2])); } else if (command.Equals("teejson")) { if (args.Length != 6) { PrintCreateJSONHelp(); return; } BigXMLProcess.CreateNFirstTranslationsJSON(inputXMLfilePath: args[3], howMany: int.Parse(args[1]), translationLanguage: args[2], bannedWordsListPath: args[4], outputFilePath: args[5]); } else if (command.Equals("mehusta")) { if (args.Length != 5) { PrintCreateJuicedJSONsHelp(); return; } JsonProcess.JuiceJsonFiles(args[1], args[2], args[3], args[4]); } }
public static bool ReadToElement(XmlReader reader, string name) { if (string.CompareOrdinal(reader.Name, name) == 0) { return(true); } while (BigXMLProcess.ReadToElement(reader)) { if (string.CompareOrdinal(reader.Name, name) == 0) { return(true); } } return(false); }
public static void PrintNFirst(string inputXMLfilePath, int howMany) { using (XmlReader reader = XmlReader.Create(inputXMLfilePath)) { reader.MoveToContent(); int count = 0; while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany) { BigXMLProcess.ReadToElement(reader, titleString); string title = reader.ReadElementContentAsString(); Console.WriteLine(title); count++; } } }
public static void PrintNFirstTranslations(string inputXMLfilePath, int howMany, string translationLanguage) { using (XmlReader reader = XmlReader.Create(inputXMLfilePath)) { reader.MoveToContent(); int count = 0; while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany) { BigXMLProcess.ReadToElement(reader, titleString); string title = reader.ReadElementContentAsString(); BigXMLProcess.ReadToElement(reader, namespaceString); string nameSpaceNumber = reader.ReadElementContentAsString(); if (nameSpaceNumber.Equals(wantedNsNumber)) { BigXMLProcess.ReadToElement(reader, textString); string text = reader.ReadElementContentAsString(); var possibleMatch = ReturnSuitableMatch(text, translationLanguage); if (possibleMatch.Item1) { string[] splitted = CleanAndSplitSuitable(possibleMatch.Item2, translationLanguage); /* if (title.Contains("book")) * { * Console.WriteLine(possibleMatch.Item2); * }*/ Console.WriteLine($"{title} - {string.Join(", ", splitted)}"); count++; } } } } }
public static void CreateNFirstTranslationsJSON(string inputXMLfilePath, int howMany, string translationLanguage, string bannedWordsListPath, string outputFilePath) { // Try to init blocklist for banned words if (!Blocklist.LoadBlocklist(bannedWordsListPath)) { Console.WriteLine($"Cannot load blocklist from {bannedWordsListPath}"); return; } Console.WriteLine($"Blocklist loaded from {bannedWordsListPath} with {Blocklist.GetWordCount()} words"); using (XmlReader reader = XmlReader.Create(inputXMLfilePath)) { reader.MoveToContent(); int count = 0; // Use sorted dictionary because order of entries will be sorted when creating JSON (this hopefully improves compression ratio) SortedDictionary <string, object> translations = new SortedDictionary <string, object>(); // Add version number and license info to JSON file in case someone needs this kind of meta info translations["_version"] = ParseDateAsVersion(Path.GetFileName(inputXMLfilePath)); translations["_license"] = licenseName; translations["_licenseUrl"] = licenseUrl; while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany) { BigXMLProcess.ReadToElement(reader, titleString); string title = reader.ReadElementContentAsString(); if (Blocklist.IsWordBlocked(title)) { Console.WriteLine($"Skipping word {title} since it is in blocklist"); continue; } BigXMLProcess.ReadToElement(reader, namespaceString); string nameSpaceNumber = reader.ReadElementContentAsString(); if (nameSpaceNumber.Equals(wantedNsNumber)) { BigXMLProcess.ReadToElement(reader, textString); string text = reader.ReadElementContentAsString(); var possibleMatch = ReturnSuitableMatch(text, translationLanguage); if (possibleMatch.Item1) { string[] splitted = CleanAndSplitSuitable(possibleMatch.Item2, translationLanguage); translations[title] = new Translation() { t = splitted }; count++; } } } Console.WriteLine($"JSON {outputFilePath} contains {count} entries"); // Output everything to single file using (StreamWriter file = File.CreateText(outputFilePath)) { JsonSerializer serializer = new JsonSerializer(); serializer.Serialize(file, translations); } } }