Beispiel #1
0
        static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                Console.WriteLine("Komento puuttuu!");
                return;
            }

            string command = args[0];

            if (command.Equals("otsikot"))
            {
                if (args.Length != 3 && args.Length != 4)
                {
                    PrintHeadersHelp();
                    return;
                }

                if (args.Length == 3)
                {
                    BigXMLProcess.PrintNFirst(inputXMLfilePath: args[2], howMany: int.Parse(args[1]));
                }
                else if (args.Length == 4)
                {
                    BigXMLProcess.PrintNFirst(inputXMLfilePath: args[2], howMany: int.Parse(args[1]), outputFilePath: args[3]);
                }
            }
            else if (command.Equals("kaannokset"))
            {
                if (args.Length != 4)
                {
                    PrintTranslationsHelp();
                    return;
                }

                BigXMLProcess.PrintNFirstTranslations(inputXMLfilePath: args[3], howMany: int.Parse(args[1]), translationLanguage: RemoveQuotesIfNeeded(args[2]));
            }
            else if (command.Equals("teejson"))
            {
                if (args.Length != 6)
                {
                    PrintCreateJSONHelp();
                    return;
                }

                BigXMLProcess.CreateNFirstTranslationsJSON(inputXMLfilePath: args[3], howMany: int.Parse(args[1]), translationLanguage: args[2], bannedWordsListPath: args[4], outputFilePath: args[5]);
            }
            else if (command.Equals("mehusta"))
            {
                if (args.Length != 5)
                {
                    PrintCreateJuicedJSONsHelp();
                    return;
                }

                JsonProcess.JuiceJsonFiles(args[1], args[2], args[3], args[4]);
            }
        }
Beispiel #2
0
        public static bool ReadToElement(XmlReader reader, string name)
        {
            if (string.CompareOrdinal(reader.Name, name) == 0)
            {
                return(true);
            }

            while (BigXMLProcess.ReadToElement(reader))
            {
                if (string.CompareOrdinal(reader.Name, name) == 0)
                {
                    return(true);
                }
            }

            return(false);
        }
Beispiel #3
0
        public static void PrintNFirst(string inputXMLfilePath, int howMany)
        {
            using (XmlReader reader = XmlReader.Create(inputXMLfilePath))
            {
                reader.MoveToContent();

                int count = 0;

                while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany)
                {
                    BigXMLProcess.ReadToElement(reader, titleString);
                    string title = reader.ReadElementContentAsString();
                    Console.WriteLine(title);
                    count++;
                }
            }
        }
Beispiel #4
0
        public static void PrintNFirstTranslations(string inputXMLfilePath, int howMany, string translationLanguage)
        {
            using (XmlReader reader = XmlReader.Create(inputXMLfilePath))
            {
                reader.MoveToContent();

                int count = 0;

                while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany)
                {
                    BigXMLProcess.ReadToElement(reader, titleString);
                    string title = reader.ReadElementContentAsString();

                    BigXMLProcess.ReadToElement(reader, namespaceString);
                    string nameSpaceNumber = reader.ReadElementContentAsString();
                    if (nameSpaceNumber.Equals(wantedNsNumber))
                    {
                        BigXMLProcess.ReadToElement(reader, textString);
                        string text          = reader.ReadElementContentAsString();
                        var    possibleMatch = ReturnSuitableMatch(text, translationLanguage);
                        if (possibleMatch.Item1)
                        {
                            string[] splitted = CleanAndSplitSuitable(possibleMatch.Item2, translationLanguage);

                            /* if (title.Contains("book"))
                             * {
                             *      Console.WriteLine(possibleMatch.Item2);
                             * }*/


                            Console.WriteLine($"{title} - {string.Join(", ", splitted)}");
                            count++;
                        }
                    }
                }
            }
        }
Beispiel #5
0
        public static void CreateNFirstTranslationsJSON(string inputXMLfilePath, int howMany, string translationLanguage, string bannedWordsListPath, string outputFilePath)
        {
            // Try to init blocklist for banned words
            if (!Blocklist.LoadBlocklist(bannedWordsListPath))
            {
                Console.WriteLine($"Cannot load blocklist from {bannedWordsListPath}");
                return;
            }

            Console.WriteLine($"Blocklist loaded from {bannedWordsListPath} with {Blocklist.GetWordCount()} words");

            using (XmlReader reader = XmlReader.Create(inputXMLfilePath))
            {
                reader.MoveToContent();

                int count = 0;

                // Use sorted dictionary because order of entries will be sorted when creating JSON (this hopefully improves compression ratio)
                SortedDictionary <string, object> translations = new SortedDictionary <string, object>();

                // Add version number and license info to JSON file in case someone needs this kind of meta info
                translations["_version"]    = ParseDateAsVersion(Path.GetFileName(inputXMLfilePath));
                translations["_license"]    = licenseName;
                translations["_licenseUrl"] = licenseUrl;

                while (BigXMLProcess.ReadToElement(reader, pageString) && count < howMany)
                {
                    BigXMLProcess.ReadToElement(reader, titleString);
                    string title = reader.ReadElementContentAsString();

                    if (Blocklist.IsWordBlocked(title))
                    {
                        Console.WriteLine($"Skipping word {title} since it is in blocklist");
                        continue;
                    }

                    BigXMLProcess.ReadToElement(reader, namespaceString);
                    string nameSpaceNumber = reader.ReadElementContentAsString();
                    if (nameSpaceNumber.Equals(wantedNsNumber))
                    {
                        BigXMLProcess.ReadToElement(reader, textString);
                        string text          = reader.ReadElementContentAsString();
                        var    possibleMatch = ReturnSuitableMatch(text, translationLanguage);
                        if (possibleMatch.Item1)
                        {
                            string[] splitted = CleanAndSplitSuitable(possibleMatch.Item2, translationLanguage);

                            translations[title] = new Translation()
                            {
                                t = splitted
                            };
                            count++;
                        }
                    }
                }

                Console.WriteLine($"JSON {outputFilePath} contains {count} entries");

                // Output everything to single file
                using (StreamWriter file = File.CreateText(outputFilePath))
                {
                    JsonSerializer serializer = new JsonSerializer();
                    serializer.Serialize(file, translations);
                }
            }
        }