Exemplo n.º 1
0
        public void ut_171130_test_ReplaceArzByArFromBidict_with_BUTTRANSLATEPERL()
        {
            String arabizi  = "Hadi bant l9ahba kokchi katbakkih bl3ani, j'ai dit kokchi";
            String expected = "<span class='notranslate'>هذه</span> bant l9ahba <span class='notranslate BUTTRANSLATEPERL'>kolchi</span> katbakkih bl3ani, j'ai dit <span class='notranslate BUTTRANSLATEPERL'>kolchi</span>";

            //
            arabizi = new TextFrequency(@"C:\Users\Yahia Alaoui\Desktop\DEV\17028OADRJNLPARBZ\").ReplaceArzByArFromBidict(arabizi);

            Assert.AreEqual(expected, arabizi);
        }
Exemplo n.º 2
0
        public void ut_171025_test_ReplaceArzByArFromBidict_on_two_words_entries_in_a_phrase()
        {
            String arabizi  = "Le maroc zwyine wa maroc telecom mjahda";
            String expected = "Le <span class='notranslate'>المغرب</span> zwyine wa <span class='notranslate'>IAM</span> mjahda";

            // clean before google/bing : o w
            arabizi = new TextFrequency(@"C:\Users\Yahia Alaoui\Desktop\DEV\17028OADRJNLPARBZ\").ReplaceArzByArFromBidict(arabizi);

            Assert.AreEqual(expected, arabizi);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Shows data to console.
        /// </summary>
        /// <param name="freq">The character frequency map.</param>
        /// <param name="sum">Sum of all characters.</param>
        /// <param name="unused">Sum of unused characters.</param>
        static void ShowData(TextFrequency freq, int sum, int unused)
        {
            foreach (var c in freq.CharacterMap)
            {
                PrintData(c.Key.ToString(), c.Value, Math.Round(c.Value / (double)sum * 100, 2));
            }

            if (unused > 0)     // print unused characters
            {
                PrintData("Unused", unused, Math.Round(unused / (double)sum * 100, 2));
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Entry point.
        /// </summary>
        /// <param name="args">The arguments from CLI.</param>
        static void Main(string[] args)
        {
            var freq = new TextFrequency();
            var sum  = 0;

            foreach (var arg in args)   // treat all args as 1 string
            {
                freq.AddString(arg);
                sum += arg.Length;
            }
            sum += args.Length - 1;     // count spaces between args
            ShowData(freq, sum, sum - freq.CharacterCount);
        }
Exemplo n.º 5
0
        public void TestAddString()
        {
            var freq = new TextFrequency("a");

            freq.AddString("aa");
            Assert.Equal(3, freq.CharacterMap['a']);
            freq.AddString("  ");
            Assert.Equal(3, freq.CharacterMap['a']);
            freq.AddString("  b  ");
            Assert.Equal(1, freq.CharacterMap['b']);
            freq.AddString("c * d + e - a");
            Assert.Equal(5, freq.CharacterMap.Count);
            Assert.Equal(8, freq.CharacterCount);
        }
Exemplo n.º 6
0
        public void ut_170725_test_add_a_post_to_corpus_after_a_newline()
        {
            String post = @"علاش المسلمين في بروكسيل ماخرجوش يستنكرو الارهاب";

            TextFrequency textFrequency = new TextFrequency();

            var before = textFrequency.GetCorpusNumberOfLine();

            textFrequency.AddPhraseToCorpus(post);
            var after = textFrequency.GetCorpusNumberOfLine();

            //
            Assert.AreEqual(after, before + 1);
        }
Exemplo n.º 7
0
        public void ut_170727_test_recompile_corpus_rtbdaw_under_40_variants()
        {
            // 1 arabizi
            String arabiziKeyword = "rtbdaw";

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword);

            Assert.IsTrue(variants.Count < 40);

            // 5 get most popular keyword
            var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);

            Assert.AreNotEqual(String.Empty, mostPopularKeyword, "most popular");

            // 7 get a post containing this keyword
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true);

            if (postText == String.Empty) // if no results, look everywhere
            {
                postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, false);
            }
            Assert.AreNotEqual(String.Empty, postText, "post");

            // 8 add this post to dict
            var textFrequency = new TextFrequency();

            if (textFrequency.CorpusContainsSentence(postText) == false)
            {
                textFrequency.AddPhraseToCorpus(postText);
            }

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            // Assert.AreEqual(completeArabicKeyword, arabicKeyword);
            Assert.AreEqual(mostPopularKeyword, arabicKeyword);
        }
Exemplo n.º 8
0
        public void ut_170725_test_delete_sentence_from_corpus()
        {
            // this one is the sentence supposedely returned by twingly for keyword ماكتجيش
            var expectedpostText = @"هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح  ✍️ يا بومبييناااي 💪";

            var textFrequency = new TextFrequency();
            var contains      = textFrequency.CorpusContainsSentence(expectedpostText);

            // 0 drop the test pharase from the dict
            textFrequency.DropPhraseFromCorpus(expectedpostText);

            contains = textFrequency.CorpusContainsSentence(expectedpostText);
            Assert.AreEqual(false, contains);

            // 8 add this post to dict
            textFrequency.AddPhraseToCorpus(expectedpostText);
        }
Exemplo n.º 9
0
        public void ut_171114_test_CountStringOccurrences_should_find_full_words_not_within_words()
        {
            String arabicsource = "خويا أنا راجوى ولكن زكريا ما بقا ش كيلعب بسيفه رسمية عيدان دقيقة الحسن قلب على راسو أكثر إدارة تعليمية ما كاينا ش التي تحمى لعب دقيقة بحال زيارتها الانتقالة خصوصا الغريم مرحاض و بالفعل وقعنا فأخطأ بحال هادوك مثل صبحي";
            String nerword      = "س";

            // clean before google/bing : o w
            var count         = TextFrequency.CountStringOccurrences(arabicsource, nerword);
            var expectedcount = 0;

            Assert.AreEqual(expectedcount, count, "1");

            //
            nerword       = "الحسن";
            count         = TextFrequency.CountStringOccurrences(arabicsource, nerword);
            expectedcount = 1;

            Assert.AreEqual(expectedcount, count, "2");
        }
Exemplo n.º 10
0
        public void ut_170721_test_add_a_post_to_corpus()
        {
            String keyword = "ينعل";
            String post    = @"+هدا واحد خينا مربي بزاااااااف كايدق باب التلاجة قبل ما يحلها.
+ هدا واحد سراق الزيت(صرصور) طاح ف مولينيكس ملي خرج قال الله ينعل اللي باقي يمشي ل لافوار";

            //
            Assert.IsTrue(post.Contains(keyword));

            TextFrequency textFrequency = new TextFrequency();

            var before = textFrequency.GetCorpusNumberOfLine();

            textFrequency.AddPhraseToCorpus(post);
            var after = textFrequency.GetCorpusNumberOfLine();

            //
            Assert.AreEqual(after, before + 1);
        }
Exemplo n.º 11
0
        public void ut_170821_test_bidictContainsWord()
        {
            var contains = new TextFrequency().BidictContainsWord("inwi");

            Assert.AreEqual(true, contains);
        }
Exemplo n.º 12
0
        public void ut_170727_test_recompile_corpus_full_loop_hazka_under_35_variants()
        {
            // this one is the sentence supposedely returned by twingly for keyword : suposoefely one line
            var expectedpostText = @"انا القوة الخارقة لي عندي هي فاش كندوز من حدا شي سعاي و تيقولي شي درهم الله يرحم الواليدين الله ينجحك الله يطول فعمرك .. تنقول امين فنفسي و تنزطم .. بحال الى خديت دعوة فابور .. و متنعطيهش درهم حيت تنكون حازق و يلا كانت عندي 2 دراهم تنصرفها و تنعطي لواحد درهم حتى كيدعي معايا و تنعطي لشي واحد اخر .. ليكونومي";

            // 0 drop the test pharase from the dict
            var textFrequency = new TextFrequency();

            textFrequency.DropPhraseFromCorpus(expectedpostText);

            // 1 arabizi
            String arabizi        = "Al houb wa al hazka";
            String arabiziKeyword = "hazka";

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";
            var    arabic          = textConverter.Convert(arabizi);

            // 2 latin words
            var matches = TextTools.ExtractLatinWords(arabic);

            Assert.AreEqual(arabiziKeyword, matches[0].Value);

            // 3 preprocess if ma/ch

            /*String pattern = @"\bma(.+)ch\b";
             * String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1");
             * Assert.AreEqual("katji", miniArabiziKeyword);*/

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword);

            Assert.IsTrue(variants.Count < 50);

            // 5 get most popular keyword
            var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);

            /*var expectedmostPopularKeyword = "متلات";
             * Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword);*/
            // 6 re-add "ma" & "ch"
            // var completeArabicKeyword = "ما" + mostPopularKeyword + "ش";
            // Assert.AreEqual("ماكتجيش", completeArabicKeyword);

            // 7 get a post containing this keyword
            // var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true);
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true);

            Assert.AreEqual(expectedpostText, postText);

            // 8 add this post to dict
            textFrequency.AddPhraseToCorpus(postText);

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            // Assert.AreEqual(completeArabicKeyword, arabicKeyword);
            Assert.AreEqual(mostPopularKeyword, arabicKeyword);
        }
Exemplo n.º 13
0
        public void ut_170725_test_recompile_corpus_full_loop_makatjich_under_50_variants()
        {
            // this one is the sentence supposedely returned by twingly for keyword ماكتجيش
            var expectedpostText = @"هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح  ✍️
يا بومبييناااي 💪";

            // make it one line
            var onelineexpectedpostText         = expectedpostText.Replace("\r\n", " ");
            var expectedonelineexpectedpostText = "هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح  ✍️ يا بومبييناااي 💪";

            Assert.AreEqual(expectedonelineexpectedpostText, onelineexpectedpostText);

            // 0 drop the test pharase from the dict
            var textFrequency = new TextFrequency();

            textFrequency.DropPhraseFromCorpus(onelineexpectedpostText);

            // 1 arabizi
            String arabizi        = "Ya wlad lkhab nta li kadwi makatjich lwa9afat w kadwi ya terikt jradistat";
            String arabiziKeyword = "makatjich";    // > 448 variantes !!?

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";
            var    arabic          = textConverter.Convert(arabizi);

            // 2 latin words
            var matches = TextTools.ExtractLatinWords(arabic);

            Assert.AreEqual(arabiziKeyword, matches[0].Value);

            // 3 preprocess if ma/ch
            String pattern            = @"\bma(.+)ch\b";
            String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1");

            Assert.AreEqual("katji", miniArabiziKeyword);

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(miniArabiziKeyword);

            Assert.IsTrue(variants.Count < 50);

            // 5 get most popular keyword
            var mostPopularKeyword         = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);
            var expectedmostPopularKeyword = "كتجي";

            Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword);
            // 6 re-add "ma" & "ch"
            var completeArabicKeyword = "ما" + mostPopularKeyword + "ش";

            Assert.AreEqual("ماكتجيش", completeArabicKeyword);

            // 7 get a post containing this keyword
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true);

            Assert.AreEqual(onelineexpectedpostText, postText);

            // 8 add this post to dict
            textFrequency.AddPhraseToCorpus(postText);

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            Assert.AreEqual(completeArabicKeyword, arabicKeyword);
        }