示例#1
0
        public void ut_170721_test_recompile_corpus_full_loop()
        {
            //
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";
            // String arabiziKeyword = "netrecheh"; // > 2000 variantes !!?
            String arabiziKeyword = "makatjich";    // > 448 variantes !!?

            // assert it is not converted at start
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            Assert.AreEqual(arabiziKeyword, arabicKeyword);

            // 1 get all variants
            var variants = textConverter.GetAllTranscriptions(arabiziKeyword);

            // 2 get most popular keyword
            var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);

            // 3 get a post containthing this keyword
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true);

            // 4 add this post to dict
            new TextFrequency().AddPhraseToCorpus(postText);

            // 5 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // assert it is now converted
            arabicKeyword = textConverter.Convert(arabiziKeyword);
            Assert.AreEqual(mostPopularKeyword, arabicKeyword);
        }
示例#2
0
        public virtual string Concatinate(Counter counter, Word word)
        {
            var convertedValue = _textConverter.Convert(counter);
            var wordValue      = word.Value();

            return(wordValue + " " + convertedValue);
        }
        public ActionResult ProcessText([FromBody] string text)
        {
            // Arabizi to arabic script
            // either direct call to perl script
            var textConverter = new TextConverter();
            // or via a php api
            // var textConverter = new ApiTextConverter();

            // SA & Entity
            var textSentimentAnalyzer = new TextSentimentAnalyzer();
            var textEntityExtraction  = new TextEntityExtraction();
            // Arabizi to arabic from perl script
            var arabicText = textConverter.Convert(text);

            // Sentiment analysis from watson https://gateway.watsonplatform.net/";
            var sentiment = textSentimentAnalyzer.GetSentiment(arabicText);

            // Entity extraction from rosette (https://api.rosette.com/rest/v1/)
            var entities = textEntityExtraction.GetEntities(arabicText);

            var result = new TextAnalyze
            {
                ArabicText = arabicText,
                Entities   = entities.ToList(),
                Sentiment  = sentiment
            };

            return(Json(result, JsonRequestBehavior.AllowGet));
        }
示例#4
0
 private void ConvertText()
 {
     if (string.IsNullOrWhiteSpace(_someText) == null)
     {
         return;
     }
     AddToHistory(_textConverter.Convert(_someText));
     SomeText = string.Empty;
 }
示例#5
0
        public void ut_170727_test_recompile_corpus_rtbdaw_under_40_variants()
        {
            // 1 arabizi
            String arabiziKeyword = "rtbdaw";

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword);

            Assert.IsTrue(variants.Count < 40);

            // 5 get most popular keyword
            var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);

            Assert.AreNotEqual(String.Empty, mostPopularKeyword, "most popular");

            // 7 get a post containing this keyword
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true);

            if (postText == String.Empty) // if no results, look everywhere
            {
                postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, false);
            }
            Assert.AreNotEqual(String.Empty, postText, "post");

            // 8 add this post to dict
            var textFrequency = new TextFrequency();

            if (textFrequency.CorpusContainsSentence(postText) == false)
            {
                textFrequency.AddPhraseToCorpus(postText);
            }

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            // Assert.AreEqual(completeArabicKeyword, arabicKeyword);
            Assert.AreEqual(mostPopularKeyword, arabicKeyword);
        }
示例#6
0
    public void SyntaxCheck(string fileName)
    {
        var      codeList = TextConverter.Convert(GetData()).ToList();
        CodeData tempcode = new EndCode();

        foreach (var code in codeList)
        {
            var codeData = tempcode.CreateCodeData(code, this);
            if (codeData == null)
            {
                ThrowErrorLog(null, fileName, "ヘッダーが不正な値です", name, code._head);
                break;
            }
            else
            {
                tempcode = codeData;
            }
        }
    }
    public void SetEventData(EventCodeScriptable data)
    {
        if (data == null)
        {
            Debug.Log("SetEventData: data is null");
            return;
        }
        _nowScriptable = data;
        var      dataList = TextConverter.Convert(data.GetData());
        CodeData nowCode  = new EndCode();

        while (dataList.Count != 0)
        {
            var target   = dataList.Dequeue();
            var nextCode = nowCode.CreateCodeData(target, data);
            if (nextCode.Equals(nowCode))
            {
                continue;
            }
            nowCode = nextCode;
            _codeList.Enqueue(nextCode);
        }
    }
示例#8
0
        public void ut_170727_test_recompile_corpus_full_loop_hazka_under_35_variants()
        {
            // this one is the sentence supposedely returned by twingly for keyword : suposoefely one line
            var expectedpostText = @"انا القوة الخارقة لي عندي هي فاش كندوز من حدا شي سعاي و تيقولي شي درهم الله يرحم الواليدين الله ينجحك الله يطول فعمرك .. تنقول امين فنفسي و تنزطم .. بحال الى خديت دعوة فابور .. و متنعطيهش درهم حيت تنكون حازق و يلا كانت عندي 2 دراهم تنصرفها و تنعطي لواحد درهم حتى كيدعي معايا و تنعطي لشي واحد اخر .. ليكونومي";

            // 0 drop the test pharase from the dict
            var textFrequency = new TextFrequency();

            textFrequency.DropPhraseFromCorpus(expectedpostText);

            // 1 arabizi
            String arabizi        = "Al houb wa al hazka";
            String arabiziKeyword = "hazka";

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";
            var    arabic          = textConverter.Convert(arabizi);

            // 2 latin words
            var matches = TextTools.ExtractLatinWords(arabic);

            Assert.AreEqual(arabiziKeyword, matches[0].Value);

            // 3 preprocess if ma/ch

            /*String pattern = @"\bma(.+)ch\b";
             * String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1");
             * Assert.AreEqual("katji", miniArabiziKeyword);*/

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword);

            Assert.IsTrue(variants.Count < 50);

            // 5 get most popular keyword
            var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);

            /*var expectedmostPopularKeyword = "متلات";
             * Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword);*/
            // 6 re-add "ma" & "ch"
            // var completeArabicKeyword = "ما" + mostPopularKeyword + "ش";
            // Assert.AreEqual("ماكتجيش", completeArabicKeyword);

            // 7 get a post containing this keyword
            // var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true);
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true);

            Assert.AreEqual(expectedpostText, postText);

            // 8 add this post to dict
            textFrequency.AddPhraseToCorpus(postText);

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            // Assert.AreEqual(completeArabicKeyword, arabicKeyword);
            Assert.AreEqual(mostPopularKeyword, arabicKeyword);
        }
示例#9
0
        public void ut_170725_test_recompile_corpus_full_loop_makatjich_under_50_variants()
        {
            // this one is the sentence supposedely returned by twingly for keyword ماكتجيش
            var expectedpostText = @"هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح  ✍️
يا بومبييناااي 💪";

            // make it one line
            var onelineexpectedpostText         = expectedpostText.Replace("\r\n", " ");
            var expectedonelineexpectedpostText = "هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح  ✍️ يا بومبييناااي 💪";

            Assert.AreEqual(expectedonelineexpectedpostText, onelineexpectedpostText);

            // 0 drop the test pharase from the dict
            var textFrequency = new TextFrequency();

            textFrequency.DropPhraseFromCorpus(onelineexpectedpostText);

            // 1 arabizi
            String arabizi        = "Ya wlad lkhab nta li kadwi makatjich lwa9afat w kadwi ya terikt jradistat";
            String arabiziKeyword = "makatjich";    // > 448 variantes !!?

            // 2 convert first pass
            var    textConverter   = new TextConverter();
            String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/";
            String twinglyApiKey   = "2A4CF6A4-4968-46EF-862F-2881EF597A55";
            var    arabic          = textConverter.Convert(arabizi);

            // 2 latin words
            var matches = TextTools.ExtractLatinWords(arabic);

            Assert.AreEqual(arabiziKeyword, matches[0].Value);

            // 3 preprocess if ma/ch
            String pattern            = @"\bma(.+)ch\b";
            String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1");

            Assert.AreEqual("katji", miniArabiziKeyword);

            // 4 get all variants
            var variants = textConverter.GetAllTranscriptions(miniArabiziKeyword);

            Assert.IsTrue(variants.Count < 50);

            // 5 get most popular keyword
            var mostPopularKeyword         = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey);
            var expectedmostPopularKeyword = "كتجي";

            Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword);
            // 6 re-add "ma" & "ch"
            var completeArabicKeyword = "ما" + mostPopularKeyword + "ش";

            Assert.AreEqual("ماكتجيش", completeArabicKeyword);

            // 7 get a post containing this keyword
            var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true);

            Assert.AreEqual(onelineexpectedpostText, postText);

            // 8 add this post to dict
            textFrequency.AddPhraseToCorpus(postText);

            // 9 recompile the dict
            textConverter.CatCorpusDict();
            textConverter.SrilmLmDict();

            // 10 assert it is now converted
            var arabicKeyword = textConverter.Convert(arabiziKeyword);

            Assert.AreEqual(completeArabicKeyword, arabicKeyword);
        }
示例#10
0
        public virtual string Print(Counter counter)
        {
            var value = _textConverter.Convert(counter);

            return(value);
        }
        public IHttpActionResult GetArabicDarijaEntry(/*[FromBody]*/ string token, String text)
        {
            var errorMessage = string.Empty;

            if (ValidateToken(token, "GetArabicDarijaEntry", out errorMessage))
            {
                M_ARABICDARIJAENTRY arabicDarijaEntry = null;

                M_ARABIZIENTRY arabiziEntry = new M_ARABIZIENTRY
                {
                    ArabiziText      = text,
                    ArabiziEntryDate = DateTime.Now
                };

                // Arabizi to arabic script via direct call to perl script
                var textConverter = new TextConverter();

                //
                List <M_ARABICDARIJAENTRY_LATINWORD> arabicDarijaEntryLatinWords = new List <M_ARABICDARIJAENTRY_LATINWORD>();

                // Arabizi to arabic from perl script
                if (arabiziEntry.ArabiziText != null)
                {
                    lock (thisLock)
                    {
                        // complete arabizi entry
                        arabiziEntry.ID_ARABIZIENTRY = Guid.NewGuid();

                        // prepare darija from perl script
                        var arabicText = textConverter.Convert(arabiziEntry.ArabiziText);
                        arabicDarijaEntry = new M_ARABICDARIJAENTRY
                        {
                            ID_ARABICDARIJAENTRY = Guid.NewGuid(),
                            ID_ARABIZIENTRY      = arabiziEntry.ID_ARABIZIENTRY,
                            ArabicDarijaText     = arabicText
                        };

                        // Save arabiziEntry to Serialization
                        String path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABIZIENTRY.txt");
                        new TextPersist().Serialize <M_ARABIZIENTRY>(arabiziEntry, path);

                        // Save arabicDarijaEntry to Serialization
                        path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY.txt");
                        new TextPersist().Serialize <M_ARABICDARIJAENTRY>(arabicDarijaEntry, path);

                        // latin words
                        MatchCollection matches = TextTools.ExtractLatinWords(arabicDarijaEntry.ArabicDarijaText);

                        // save every match
                        // also calculate on the fly the number of varaiants
                        foreach (Match match in matches)
                        {
                            // do not consider words in the bidict as latin words
                            if (new TextFrequency().BidictContainsWord(match.Value))
                            {
                                continue;
                            }

                            String arabiziWord   = match.Value;
                            int    variantsCount = new TextConverter().GetAllTranscriptions(arabiziWord).Count;

                            var latinWord = new M_ARABICDARIJAENTRY_LATINWORD
                            {
                                ID_ARABICDARIJAENTRY_LATINWORD = Guid.NewGuid(),
                                ID_ARABICDARIJAENTRY           = arabicDarijaEntry.ID_ARABICDARIJAENTRY,
                                LatinWord     = arabiziWord,
                                VariantsCount = variantsCount
                            };

                            //
                            arabicDarijaEntryLatinWords.Add(latinWord);

                            // Save to Serialization
                            path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY_LATINWORD.txt");
                            new TextPersist().Serialize <M_ARABICDARIJAENTRY_LATINWORD>(latinWord, path);
                        }
                    }
                }

                //
                if (arabicDarijaEntry == null)
                {
                    return(NotFound());
                }
                // return Ok(arabicDarijaEntry);

                // use expando to merge the json ouptuts : arabizi + arabic + latin words
                dynamic expando = new ExpandoObject();
                expando.M_ARABIZIENTRY                = arabiziEntry;
                expando.M_ARABICDARIJAENTRY           = arabicDarijaEntry;
                expando.M_ARABICDARIJAENTRY_LATINWORD = arabicDarijaEntryLatinWords;
                return(Ok(expando));
            }
            else
            {
                var message = new HttpResponseMessage();
                message.StatusCode = HttpStatusCode.NotAcceptable;
                message.Content    = new StringContent(errorMessage);
                return(Ok(message));
            }
        }