public void ut_170721_test_recompile_corpus_full_loop() { // var textConverter = new TextConverter(); String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/"; String twinglyApiKey = "2A4CF6A4-4968-46EF-862F-2881EF597A55"; // String arabiziKeyword = "netrecheh"; // > 2000 variantes !!? String arabiziKeyword = "makatjich"; // > 448 variantes !!? // assert it is not converted at start var arabicKeyword = textConverter.Convert(arabiziKeyword); Assert.AreEqual(arabiziKeyword, arabicKeyword); // 1 get all variants var variants = textConverter.GetAllTranscriptions(arabiziKeyword); // 2 get most popular keyword var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey); // 3 get a post containthing this keyword var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true); // 4 add this post to dict new TextFrequency().AddPhraseToCorpus(postText); // 5 recompile the dict textConverter.CatCorpusDict(); textConverter.SrilmLmDict(); // assert it is now converted arabicKeyword = textConverter.Convert(arabiziKeyword); Assert.AreEqual(mostPopularKeyword, arabicKeyword); }
public virtual string Concatinate(Counter counter, Word word) { var convertedValue = _textConverter.Convert(counter); var wordValue = word.Value(); return(wordValue + " " + convertedValue); }
public ActionResult ProcessText([FromBody] string text) { // Arabizi to arabic script // either direct call to perl script var textConverter = new TextConverter(); // or via a php api // var textConverter = new ApiTextConverter(); // SA & Entity var textSentimentAnalyzer = new TextSentimentAnalyzer(); var textEntityExtraction = new TextEntityExtraction(); // Arabizi to arabic from perl script var arabicText = textConverter.Convert(text); // Sentiment analysis from watson https://gateway.watsonplatform.net/"; var sentiment = textSentimentAnalyzer.GetSentiment(arabicText); // Entity extraction from rosette (https://api.rosette.com/rest/v1/) var entities = textEntityExtraction.GetEntities(arabicText); var result = new TextAnalyze { ArabicText = arabicText, Entities = entities.ToList(), Sentiment = sentiment }; return(Json(result, JsonRequestBehavior.AllowGet)); }
private void ConvertText() { if (string.IsNullOrWhiteSpace(_someText) == null) { return; } AddToHistory(_textConverter.Convert(_someText)); SomeText = string.Empty; }
public void ut_170727_test_recompile_corpus_rtbdaw_under_40_variants() { // 1 arabizi String arabiziKeyword = "rtbdaw"; // 2 convert first pass var textConverter = new TextConverter(); String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/"; String twinglyApiKey = "2A4CF6A4-4968-46EF-862F-2881EF597A55"; // 4 get all variants var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword); Assert.IsTrue(variants.Count < 40); // 5 get most popular keyword var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey); Assert.AreNotEqual(String.Empty, mostPopularKeyword, "most popular"); // 7 get a post containing this keyword var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true); if (postText == String.Empty) // if no results, look everywhere { postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, false); } Assert.AreNotEqual(String.Empty, postText, "post"); // 8 add this post to dict var textFrequency = new TextFrequency(); if (textFrequency.CorpusContainsSentence(postText) == false) { textFrequency.AddPhraseToCorpus(postText); } // 9 recompile the dict textConverter.CatCorpusDict(); textConverter.SrilmLmDict(); // 10 assert it is now converted var arabicKeyword = textConverter.Convert(arabiziKeyword); // Assert.AreEqual(completeArabicKeyword, arabicKeyword); Assert.AreEqual(mostPopularKeyword, arabicKeyword); }
public void SyntaxCheck(string fileName) { var codeList = TextConverter.Convert(GetData()).ToList(); CodeData tempcode = new EndCode(); foreach (var code in codeList) { var codeData = tempcode.CreateCodeData(code, this); if (codeData == null) { ThrowErrorLog(null, fileName, "ヘッダーが不正な値です", name, code._head); break; } else { tempcode = codeData; } } }
public void SetEventData(EventCodeScriptable data) { if (data == null) { Debug.Log("SetEventData: data is null"); return; } _nowScriptable = data; var dataList = TextConverter.Convert(data.GetData()); CodeData nowCode = new EndCode(); while (dataList.Count != 0) { var target = dataList.Dequeue(); var nextCode = nowCode.CreateCodeData(target, data); if (nextCode.Equals(nowCode)) { continue; } nowCode = nextCode; _codeList.Enqueue(nextCode); } }
public void ut_170727_test_recompile_corpus_full_loop_hazka_under_35_variants() { // this one is the sentence supposedely returned by twingly for keyword : suposoefely one line var expectedpostText = @"انا القوة الخارقة لي عندي هي فاش كندوز من حدا شي سعاي و تيقولي شي درهم الله يرحم الواليدين الله ينجحك الله يطول فعمرك .. تنقول امين فنفسي و تنزطم .. بحال الى خديت دعوة فابور .. و متنعطيهش درهم حيت تنكون حازق و يلا كانت عندي 2 دراهم تنصرفها و تنعطي لواحد درهم حتى كيدعي معايا و تنعطي لشي واحد اخر .. ليكونومي"; // 0 drop the test pharase from the dict var textFrequency = new TextFrequency(); textFrequency.DropPhraseFromCorpus(expectedpostText); // 1 arabizi String arabizi = "Al houb wa al hazka"; String arabiziKeyword = "hazka"; // 2 convert first pass var textConverter = new TextConverter(); String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/"; String twinglyApiKey = "2A4CF6A4-4968-46EF-862F-2881EF597A55"; var arabic = textConverter.Convert(arabizi); // 2 latin words var matches = TextTools.ExtractLatinWords(arabic); Assert.AreEqual(arabiziKeyword, matches[0].Value); // 3 preprocess if ma/ch /*String pattern = @"\bma(.+)ch\b"; * String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1"); * Assert.AreEqual("katji", miniArabiziKeyword);*/ // 4 get all variants var variants = textConverter.GetAllTranscriptions(/*miniA*/ arabiziKeyword); Assert.IsTrue(variants.Count < 50); // 5 get most popular keyword var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey); /*var expectedmostPopularKeyword = "متلات"; * Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword);*/ // 6 re-add "ma" & "ch" // var completeArabicKeyword = "ما" + mostPopularKeyword + "ش"; // Assert.AreEqual("ماكتجيش", completeArabicKeyword); // 7 get a post containing this keyword // var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true); var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(mostPopularKeyword, twinglyApi15Url, twinglyApiKey, true); Assert.AreEqual(expectedpostText, postText); // 8 add this post to dict textFrequency.AddPhraseToCorpus(postText); // 9 recompile the dict textConverter.CatCorpusDict(); textConverter.SrilmLmDict(); // 10 assert it is now converted var arabicKeyword = textConverter.Convert(arabiziKeyword); // Assert.AreEqual(completeArabicKeyword, arabicKeyword); Assert.AreEqual(mostPopularKeyword, arabicKeyword); }
public void ut_170725_test_recompile_corpus_full_loop_makatjich_under_50_variants() { // this one is the sentence supposedely returned by twingly for keyword ماكتجيش var expectedpostText = @"هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح ✍️ يا بومبييناااي 💪"; // make it one line var onelineexpectedpostText = expectedpostText.Replace("\r\n", " "); var expectedonelineexpectedpostText = "هاد البومب ماكتجيش عشوائية 😁 ، كتوجد ليها من الليلة ديال لبارح ✍️ يا بومبييناااي 💪"; Assert.AreEqual(expectedonelineexpectedpostText, onelineexpectedpostText); // 0 drop the test pharase from the dict var textFrequency = new TextFrequency(); textFrequency.DropPhraseFromCorpus(onelineexpectedpostText); // 1 arabizi String arabizi = "Ya wlad lkhab nta li kadwi makatjich lwa9afat w kadwi ya terikt jradistat"; String arabiziKeyword = "makatjich"; // > 448 variantes !!? // 2 convert first pass var textConverter = new TextConverter(); String twinglyApi15Url = "https://data.twingly.net/socialfeed/a/api/v1.5/"; String twinglyApiKey = "2A4CF6A4-4968-46EF-862F-2881EF597A55"; var arabic = textConverter.Convert(arabizi); // 2 latin words var matches = TextTools.ExtractLatinWords(arabic); Assert.AreEqual(arabiziKeyword, matches[0].Value); // 3 preprocess if ma/ch String pattern = @"\bma(.+)ch\b"; String miniArabiziKeyword = Regex.Replace(arabiziKeyword, pattern, "$1"); Assert.AreEqual("katji", miniArabiziKeyword); // 4 get all variants var variants = textConverter.GetAllTranscriptions(miniArabiziKeyword); Assert.IsTrue(variants.Count < 50); // 5 get most popular keyword var mostPopularKeyword = OADRJNLPCommon.Business.Business.getMostPopularVariantFromFBViaTwingly(variants, twinglyApi15Url, twinglyApiKey); var expectedmostPopularKeyword = "كتجي"; Assert.AreEqual(expectedmostPopularKeyword, mostPopularKeyword); // 6 re-add "ma" & "ch" var completeArabicKeyword = "ما" + mostPopularKeyword + "ش"; Assert.AreEqual("ماكتجيش", completeArabicKeyword); // 7 get a post containing this keyword var postText = OADRJNLPCommon.Business.Business.getPostBasedOnKeywordFromFBViaTwingly(completeArabicKeyword, twinglyApi15Url, twinglyApiKey, true); Assert.AreEqual(onelineexpectedpostText, postText); // 8 add this post to dict textFrequency.AddPhraseToCorpus(postText); // 9 recompile the dict textConverter.CatCorpusDict(); textConverter.SrilmLmDict(); // 10 assert it is now converted var arabicKeyword = textConverter.Convert(arabiziKeyword); Assert.AreEqual(completeArabicKeyword, arabicKeyword); }
public virtual string Print(Counter counter) { var value = _textConverter.Convert(counter); return(value); }
public IHttpActionResult GetArabicDarijaEntry(/*[FromBody]*/ string token, String text) { var errorMessage = string.Empty; if (ValidateToken(token, "GetArabicDarijaEntry", out errorMessage)) { M_ARABICDARIJAENTRY arabicDarijaEntry = null; M_ARABIZIENTRY arabiziEntry = new M_ARABIZIENTRY { ArabiziText = text, ArabiziEntryDate = DateTime.Now }; // Arabizi to arabic script via direct call to perl script var textConverter = new TextConverter(); // List <M_ARABICDARIJAENTRY_LATINWORD> arabicDarijaEntryLatinWords = new List <M_ARABICDARIJAENTRY_LATINWORD>(); // Arabizi to arabic from perl script if (arabiziEntry.ArabiziText != null) { lock (thisLock) { // complete arabizi entry arabiziEntry.ID_ARABIZIENTRY = Guid.NewGuid(); // prepare darija from perl script var arabicText = textConverter.Convert(arabiziEntry.ArabiziText); arabicDarijaEntry = new M_ARABICDARIJAENTRY { ID_ARABICDARIJAENTRY = Guid.NewGuid(), ID_ARABIZIENTRY = arabiziEntry.ID_ARABIZIENTRY, ArabicDarijaText = arabicText }; // Save arabiziEntry to Serialization String path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABIZIENTRY.txt"); new TextPersist().Serialize <M_ARABIZIENTRY>(arabiziEntry, path); // Save arabicDarijaEntry to Serialization path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY.txt"); new TextPersist().Serialize <M_ARABICDARIJAENTRY>(arabicDarijaEntry, path); // latin words MatchCollection matches = TextTools.ExtractLatinWords(arabicDarijaEntry.ArabicDarijaText); // save every match // also calculate on the fly the number of varaiants foreach (Match match in matches) { // do not consider words in the bidict as latin words if (new TextFrequency().BidictContainsWord(match.Value)) { continue; } String arabiziWord = match.Value; int variantsCount = new TextConverter().GetAllTranscriptions(arabiziWord).Count; var latinWord = new M_ARABICDARIJAENTRY_LATINWORD { ID_ARABICDARIJAENTRY_LATINWORD = Guid.NewGuid(), ID_ARABICDARIJAENTRY = arabicDarijaEntry.ID_ARABICDARIJAENTRY, LatinWord = arabiziWord, VariantsCount = variantsCount }; // arabicDarijaEntryLatinWords.Add(latinWord); // Save to Serialization path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY_LATINWORD.txt"); new TextPersist().Serialize <M_ARABICDARIJAENTRY_LATINWORD>(latinWord, path); } } } // if (arabicDarijaEntry == null) { return(NotFound()); } // return Ok(arabicDarijaEntry); // use expando to merge the json ouptuts : arabizi + arabic + latin words dynamic expando = new ExpandoObject(); expando.M_ARABIZIENTRY = arabiziEntry; expando.M_ARABICDARIJAENTRY = arabicDarijaEntry; expando.M_ARABICDARIJAENTRY_LATINWORD = arabicDarijaEntryLatinWords; return(Ok(expando)); } else { var message = new HttpResponseMessage(); message.StatusCode = HttpStatusCode.NotAcceptable; message.Content = new StringContent(errorMessage); return(Ok(message)); } }