Exemple #1
0
        public IHttpActionResult ExtractNers(/*[FromBody]*/ string token, String text)
        {
            // are we allowed to use the API
            var errorMessage = string.Empty;

            if (!ValidateToken(token, "Extract", out errorMessage))
            {
                dynamic errexpando = new ExpandoObject();
                errexpando.StatusCode   = HttpStatusCode.NotAcceptable;
                errexpando.ErrorMessage = errorMessage.Trim(new char[] { '\"' });
                return(Ok(errexpando));
            }

            //
            M_ARABIZIENTRY arabiziEntry = new M_ARABIZIENTRY
            {
                ArabiziText      = text,
                ArabiziEntryDate = DateTime.Now
            };

            // call real work
            // use expando to merge the json ouptuts : arabizi + arabic + latin words
            // plus also M_ARABICDARIJAENTRY_TEXTENTITYs
            dynamic expando = new Arabizer().train(arabiziEntry, null, thisLock: thisLock);

            // keep only arabizi + arabic + ner
            expando.M_ARABICDARIJAENTRY_LATINWORDs = null;

            //
            return(Ok(expando));
        }
Exemple #2
0
        public IHttpActionResult ExtractSaNers(/*[FromBody]*/ string token, String text)
        {
            // are we allowed to use the API
            var errorMessage = string.Empty;

            if (!ValidateToken(token, "Extract", out errorMessage))
            {
                dynamic errexpando = new ExpandoObject();
                errexpando.StatusCode   = HttpStatusCode.NotAcceptable;
                errexpando.ErrorMessage = errorMessage.Trim(new char[] { '\"' });
                return(Ok(errexpando));
            }

            //
            M_ARABIZIENTRY arabiziEntry = new M_ARABIZIENTRY
            {
                ArabiziText      = text,
                ArabiziEntryDate = DateTime.Now
            };

            // call real work
            // use expando to merge the json ouptuts : arabizi + arabic + latin words
            // plus also M_ARABICDARIJAENTRY_TEXTENTITYs
            dynamic expando = new Arabizer().train(arabiziEntry, null, thisLock: thisLock);

            // keep only arabizi + arabic + ner
            expando.M_ARABICDARIJAENTRY_LATINWORDs = null;

            // limit to positive/negative ner
            List <M_ARABICDARIJAENTRY_TEXTENTITY> textEntities = expando.M_ARABICDARIJAENTRY_TEXTENTITYs;

            textEntities.RemoveAll(m => m.TextEntity.Type != "NEGATIVE" && m.TextEntity.Type != "POSITIVE" && m.TextEntity.Type != "SUPPORT" && m.TextEntity.Type != "SENSITIVE" && m.TextEntity.Type != "OPPOSE" && m.TextEntity.Type != "EXPLETIVE");
            expando.M_ARABICDARIJAENTRY_TEXTENTITYs = textEntities;

            //
            return(Ok(expando));
        }
        public IHttpActionResult GetArabicDarijaEntry(/*[FromBody]*/ string token, String text)
        {
            var errorMessage = string.Empty;

            if (ValidateToken(token, "GetArabicDarijaEntry", out errorMessage))
            {
                M_ARABICDARIJAENTRY arabicDarijaEntry = null;

                M_ARABIZIENTRY arabiziEntry = new M_ARABIZIENTRY
                {
                    ArabiziText      = text,
                    ArabiziEntryDate = DateTime.Now
                };

                // Arabizi to arabic script via direct call to perl script
                var textConverter = new TextConverter();

                //
                List <M_ARABICDARIJAENTRY_LATINWORD> arabicDarijaEntryLatinWords = new List <M_ARABICDARIJAENTRY_LATINWORD>();

                // Arabizi to arabic from perl script
                if (arabiziEntry.ArabiziText != null)
                {
                    lock (thisLock)
                    {
                        // complete arabizi entry
                        arabiziEntry.ID_ARABIZIENTRY = Guid.NewGuid();

                        // prepare darija from perl script
                        var arabicText = textConverter.Convert(arabiziEntry.ArabiziText);
                        arabicDarijaEntry = new M_ARABICDARIJAENTRY
                        {
                            ID_ARABICDARIJAENTRY = Guid.NewGuid(),
                            ID_ARABIZIENTRY      = arabiziEntry.ID_ARABIZIENTRY,
                            ArabicDarijaText     = arabicText
                        };

                        // Save arabiziEntry to Serialization
                        String path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABIZIENTRY.txt");
                        new TextPersist().Serialize <M_ARABIZIENTRY>(arabiziEntry, path);

                        // Save arabicDarijaEntry to Serialization
                        path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY.txt");
                        new TextPersist().Serialize <M_ARABICDARIJAENTRY>(arabicDarijaEntry, path);

                        // latin words
                        MatchCollection matches = TextTools.ExtractLatinWords(arabicDarijaEntry.ArabicDarijaText);

                        // save every match
                        // also calculate on the fly the number of varaiants
                        foreach (Match match in matches)
                        {
                            // do not consider words in the bidict as latin words
                            if (new TextFrequency().BidictContainsWord(match.Value))
                            {
                                continue;
                            }

                            String arabiziWord   = match.Value;
                            int    variantsCount = new TextConverter().GetAllTranscriptions(arabiziWord).Count;

                            var latinWord = new M_ARABICDARIJAENTRY_LATINWORD
                            {
                                ID_ARABICDARIJAENTRY_LATINWORD = Guid.NewGuid(),
                                ID_ARABICDARIJAENTRY           = arabicDarijaEntry.ID_ARABICDARIJAENTRY,
                                LatinWord     = arabiziWord,
                                VariantsCount = variantsCount
                            };

                            //
                            arabicDarijaEntryLatinWords.Add(latinWord);

                            // Save to Serialization
                            path = HostingEnvironment.MapPath("~/App_Data/data_M_ARABICDARIJAENTRY_LATINWORD.txt");
                            new TextPersist().Serialize <M_ARABICDARIJAENTRY_LATINWORD>(latinWord, path);
                        }
                    }
                }

                //
                if (arabicDarijaEntry == null)
                {
                    return(NotFound());
                }
                // return Ok(arabicDarijaEntry);

                // use expando to merge the json ouptuts : arabizi + arabic + latin words
                dynamic expando = new ExpandoObject();
                expando.M_ARABIZIENTRY                = arabiziEntry;
                expando.M_ARABICDARIJAENTRY           = arabicDarijaEntry;
                expando.M_ARABICDARIJAENTRY_LATINWORD = arabicDarijaEntryLatinWords;
                return(Ok(expando));
            }
            else
            {
                var message = new HttpResponseMessage();
                message.StatusCode = HttpStatusCode.NotAcceptable;
                message.Content    = new StringContent(errorMessage);
                return(Ok(message));
            }
        }