Example #1
0
        /// <summary>
        /// Generate Language Profile from Text File
        /// <pre>
        /// usage: --genprofile-text -l [language code] [text file path]
        /// </pre>
        /// </summary>
        private void generateProfileFromText()
        {
            if (arglist.Count != 1)
            {
                Console.Error.WriteLine("Need to specify text file path");
                return;
            }
            string file = arglist[0];

            if (!File.Exists(file))
            {
                Console.Error.WriteLine("Need to specify existing text file path");
                return;
            }

            string lang = Get("lang");

            if (lang == null)
            {
                Console.Error.WriteLine("Need to specify langage code(-l)");
                return;
            }

            FileStream os = null;

            try
            {
                LangProfile profile = GenProfile.LoadFromText(lang, file);
                profile.OmitLessFreq();

                string profile_path = lang;
                File.WriteAllText(profile_path, JsonSerializer.Serialize(profile));
            }
            catch (NotSupportedException e)
            {
                Debug.WriteLine(e);
            }
            catch (IOException e)
            {
                Debug.WriteLine(e);
            }
            catch (LangDetectException e)
            {
                Debug.WriteLine(e);
            }
            finally
            {
                try
                {
                    if (os != null)
                    {
                        os.Close();
                    }
                }
                catch (IOException e) { }
            }
        }
Example #2
0
        public void testAdd()
        {
            LangProfile profile = new LangProfile("en");

            profile.Add("a");
            Assert.AreEqual((int)profile.Freq["a"], 1);
            profile.Add("a");
            Assert.AreEqual((int)profile.Freq["a"], 2);
            profile.OmitLessFreq();
        }
Example #3
0
        /// <summary>
        /// Generate Language Profile from Wikipedia Abstract Database File
        /// <pre>
        /// usage: --genprofile -d [abstracts directory] [language names]
        /// </pre>
        /// </summary>
        public void GenerateProfile()
        {
            string directory = Get("directory");

            foreach (string lang in arglist)
            {
                string file = SearchFile(directory, lang + "wiki-.*-abstract\\.xml.*");
                if (file == null)
                {
                    Console.Error.WriteLine("Not Found abstract xml : lang = " + lang);
                    continue;
                }

                FileStream os = null;
                try
                {
                    LangProfile profile = GenProfile.LoadFromWikipediaAbstract(lang, file);
                    profile.OmitLessFreq();

                    string profile_path = Get("directory") + "/profiles/" + lang;
                    File.WriteAllText(profile_path, JsonSerializer.Serialize(profile));
                }
                catch (NotSupportedException e)
                {
                    Debug.WriteLine(e);
                }
                catch (IOException e)
                {
                    Debug.WriteLine(e);
                }
                catch (LangDetectException e)
                {
                    Debug.WriteLine(e);
                }
                finally
                {
                    try
                    {
                        if (os != null)
                        {
                            os.Close();
                        }
                    }
                    catch (IOException e) { }
                }
            }
        }
Example #4
0
        public void testOmitLessFreq()
        {
            LangProfile profile = new LangProfile("en");

            string[] grams = "a b c \u3042 \u3044 \u3046 \u3048 \u304a \u304b \u304c \u304d \u304e \u304f".Split(" ");
            for (int i = 0; i < 5; ++i)
            {
                foreach (string g in grams)
                {
                    profile.Add(g);
                }
            }
            profile.Add("\u3050");

            Assert.AreEqual((int)profile.Freq["a"], 5);
            Assert.AreEqual((int)profile.Freq["\u3042"], 5);
            Assert.AreEqual((int)profile.Freq["\u3050"], 1);
            profile.OmitLessFreq();
            Assert.AreEqual(profile.Freq["a"], null);      // omitted
            Assert.AreEqual((int)profile.Freq["\u3042"], 5);
            Assert.AreEqual(profile.Freq["\u3050"], null); // omitted
        }
Example #5
0
        public void testOmitLessFreqIllegally()
        {
            LangProfile profile = new LangProfile();

            profile.OmitLessFreq();  // ignore
        }