public Payload RunPlugin(Payload Input)
        {
            Payload pData = new Payload();

            pData.FileID    = Input.FileID;
            pData.SegmentID = Input.SegmentID;



            for (int i = 0; i < Input.StringList.Count; i++)
            {
                if (Input.StringList[i].Length > 0)
                {
                    var NB_Data = NBIdentifier.Identify(Input.StringList[i]);
                    var RL_Data = RLIdentifier.Identify(Input.StringList[i]);

                    pData.StringArrayList.Add(new string[2] {
                        NB_Data.First().Item1.Iso639_2T.Replace("simple", "en"), RL_Data.First().Item1.Iso639_2T.Replace("simple", "en")
                    });
                }
                else
                {
                    pData.StringArrayList.Add(new string[2] {
                        "", ""
                    });
                }

                pData.SegmentNumber.Add(Input.SegmentNumber[i]);
            }

            return(pData);
        }
Ejemplo n.º 2
0
        private async Task HandleCommandAsync(SocketMessage arg)
        {
            SocketUserMessage msg = arg as SocketUserMessage;

            if (msg == null || msg.Author.Id == Client.CurrentUser.Id)
            {
                return;
            }
            int pos = 0;

            if (!arg.Author.IsBot && (msg.HasMentionPrefix(Client.CurrentUser, ref pos) || msg.HasStringPrefix("j.", ref pos)))
            {
                SocketCommandContext context = new SocketCommandContext(Client, msg);
                var result = await _commands.ExecuteAsync(context, pos, null);

                if (!result.IsSuccess)
                {
                    Console.WriteLine(result.Error.ToString() + ": " + result.ErrorReason);
                }
                else
                {
                    return;
                }
            }

            // Spell check
            if (!(msg.Content.StartsWith("!") || msg.Content.StartsWith(".") ||
                  (msg.Content.Length > 2 && (msg.Content[1] == '.' || msg.Content[1] == '!')) ||
                  (msg.Content.Length > 3 && (msg.Content[2] == '.' || msg.Content[2] == '!'))))  // Common used bot prefix
            {
                string msgText             = Regex.Replace(msg.Content, "https?:\\/\\/(www\\.)?[^\\.]+\\.([\\s\\S]+(\\/|\\.)?)+", "");
                var    languages           = _identifier.Identify(msg.Content);
                var    mostCertainLanguage = languages.FirstOrDefault();
                if (mostCertainLanguage != null && File.Exists("Dictionaries/" + mostCertainLanguage.Item1.Iso639_2T + ".dic"))
                {
                    var checker = _dictionaries[mostCertainLanguage.Item1.Iso639_2T];
                    foreach (string s in msgText.Split(_splitChar, StringSplitOptions.RemoveEmptyEntries))
                    {
                        if (!s.Any(x => char.IsLetter(x)))
                        {
                            continue;
                        }
                        if (!checker.Check(s))
                        {
                            var word        = s.Trim(_splitChar);
                            var suggestions = checker.Suggest(word).ToArray();
                            if (suggestions.Length == 0)
                            {
                                await msg.Channel.SendMessageAsync("\"" + s + "\" doesn't exists");
                            }
                            else
                            {
                                await msg.Channel.SendMessageAsync("\"" + s + "\" doesn't exists, maybe you meant \"" + suggestions[0] + "\"?");
                            }
                            break;
                        }
                    }
                }
            }
        }
Ejemplo n.º 3
0
        public string TryDetectLanguague(string text)
        {
            string result = string.Empty;

            if (_LanguageIdentificationFailed)
            {
                return(result);
            }

            try
            {
                if (_NTextCatFactory == null || _NTextCatIdentifier == null)
                {
                    _NTextCatFactory    = new RankedLanguageIdentifierFactory();
                    _NTextCatIdentifier = _NTextCatFactory.Load(_NTextCatLanguageModelsPath);
                }

                var languages           = _NTextCatIdentifier.Identify(text);
                var mostCertainLanguage = languages.FirstOrDefault();

                if (mostCertainLanguage != null)
                {
                    result = ConvertISOLangugueNameToSystemName(mostCertainLanguage.Item1.Iso639_3);
                }
            }
            catch (Exception e)
            {
                _LanguageIdentificationFailed = true;
                _Logger?.WriteLog(e.ToString());
            }

            return(result);
        }
Ejemplo n.º 4
0
        public DetectedLanguageResponse DetectLanguage(LanguageDetectRequest model)
        {
            IEnumerable <Tuple <LanguageInfo, double> > matches = _identifier.Identify(model.TextForLanguageClassification);
            DetectedLanguageResponse detectedLanguageResponse   = FormatResponse(matches, model);

            return(detectedLanguageResponse);
        }
Ejemplo n.º 5
0
        private static string Lang(string message)
        {
            var languages           = Identifier.Identify(message);
            var mostCertainLanguage = languages.FirstOrDefault();

            return(mostCertainLanguage != null ? mostCertainLanguage.Item1.Iso639_2T : "The language could not be identified with an acceptable degree of certainty");
        }
Ejemplo n.º 6
0
        public static void DetectLanguage(RankedLanguageIdentifier identifier, string sequence, int index)
        {
            var languages           = identifier.Identify(sequence.ToLower());
            var mostCertainLanguage = languages.FirstOrDefault();

            if (mostCertainLanguage != null && mostCertainLanguage.Item1.Iso639_3 == "eng" && mostCertainLanguage.Item2 <= CONFIDENCE_THRESHOLD)
            {
                SaveToFile(sequence, mostCertainLanguage.Item2, index);
            }
        }
Ejemplo n.º 7
0
        public static string GetLanguage(string text)
        {
            var languages           = _identifier.Identify(text);
            var mostCertainLanguage = languages.FirstOrDefault();

            if (mostCertainLanguage != null)
            {
                // http://en.wikipedia.org/wiki/List_of_ISO_639-3_codes
                return(mostCertainLanguage.Item1.Iso639_3);
            }

            return(null);
        }
Ejemplo n.º 8
0
        string getLanguage(string text)
        {
            if (isEnglishCharacters(text))
            {
                return("eng");
            }
            if (isJapaneseCharacters(text))
            {
                return("jpn");
            }

            if (ncIdentifier_ == null)
            {
                var file = Path.Combine(Path.GetDirectoryName(Application.ExecutablePath),
                                        @"Core14.profile.xml");
                if (!File.Exists(file))
                {
                    MessageBox.Show("Profile file of NTextCat not found.");
                    return(string.Empty);
                }
                var fac = new RankedLanguageIdentifierFactory();
                ncIdentifier_ = fac.Load(file);
            }

            var languages           = ncIdentifier_.Identify(text);
            var mostCertainLanguage = languages.FirstOrDefault();

            string lang;

            if (mostCertainLanguage == null)
            {
                lang = string.Empty;
            }
            else
            {
                lang = mostCertainLanguage.Item1.Iso639_3;
            }

            //if (string.IsNullOrEmpty(lang) ||
            //    (lang != "eng" && lang != "jpn"))
            //{
            //    if (isEnglish_obsolete(text))
            //        lang = "eng";
            //    else if (isJapanese_obsolete(text))
            //        lang = "jpn";
            //}

            return(lang);
        }
        private Language DetectLanguage(string noticeContent)
        {
            // can be an absolute or relative path. Beware of 260 chars limitation of the path length in Windows. Linux allows 4096 chars.
            var languages = _rankedLanguageIdentifier.Identify(noticeContent.Substring(0, 500));
            var iso       = languages.FirstOrDefault()?.Item1.Iso639_2T;

            if (iso == null)
            {
                return(Language.Unknown);
            }

            var success = Enum.TryParse(typeof(Language), iso, true, out var language);

            return(success ? (Language)language : Language.Unknown);
        }
Ejemplo n.º 10
0
        public Domain.Language.Language Identify(string text)
        {
            var languages = _identifier.Identify(text);

            var mostCertainLanguage = languages.FirstOrDefault();

            var language = mostCertainLanguage?.Item1;

            if (language == null)
            {
                return(null);
            }

            return(new Domain.Language.Language(language.Iso639_3));
        }
Ejemplo n.º 11
0
        public void RetriveLanguageDataFromString()
        {
            var spanishMessage = "como esta por que";
            var englishMessage = "why are we doing this";

            var file = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Configuration\Core14.profile.xml"));

            using (var readStream = File.Open(file.FullName, FileMode.Open))
            {
                var sut = new RankedLanguageIdentifierFactory();
                _identifier = sut.Load(readStream);
            }

            var spanishLanguageIdentifier = _identifier.Identify(spanishMessage);
            var englishLanguageIdentifier = _identifier.Identify(englishMessage);
            var mostCertainSpaLanguage    = spanishLanguageIdentifier.FirstOrDefault();
            var mostCertainEngLanguage    = englishLanguageIdentifier.FirstOrDefault();

            var theSpaLanguage = mostCertainSpaLanguage.Item1.Iso639_3;
            var theEngLanguage = mostCertainEngLanguage.Item1.Iso639_3;

            theSpaLanguage.Should().Be("es-MX");
            theEngLanguage.Should().Be("en-US");
        }
Ejemplo n.º 12
0
        public string GetLanguageId(string contents)
        {
            try
            {
                var result = _languageIdentifier.Identify(contents);

                LanguageInfo bestFitLangInfo = null;
                var          minMismatch     = double.MaxValue;
                foreach (var item in result)
                {
                    if (item.Item2 < minMismatch)
                    {
                        bestFitLangInfo = item.Item1;
                        minMismatch     = item.Item2;
                    }
                }

                return(bestFitLangInfo?.Iso639_3 ?? "eng");
            }
            catch (Exception e)
            {
                return("eng");
            }
        }
Ejemplo n.º 13
0
        public string Detect(string text)
        {
            IEnumerable <Tuple <LanguageInfo, double> > languages = identifier.Identify(text);
            Tuple <LanguageInfo, double> mostCertainLanguage      = languages.FirstOrDefault();
            CultureInfo culture = CultureInfo.GetCultures(CultureTypes.NeutralCultures).Where(
                ci => string.Equals(ci.ThreeLetterISOLanguageName, mostCertainLanguage.Item1.Iso639_3,
                                    StringComparison.OrdinalIgnoreCase)).FirstOrDefault();

            if (culture != null)
            {
                Console.WriteLine("The language of the text is '{0}' (ISO639-3  code)", mostCertainLanguage.Item1.Iso639_3);
                return(culture.Name);
            }
            else
            {
                Console.WriteLine("The language couldn’t be identified with an acceptable degree of certainty");
                return("");
            }
        }
Ejemplo n.º 14
0
        public void ChangeCurrentCulture()
        {
            var spanishMessage = "No stock disponible en la maquina por favor";

            var file = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Configuration\Core14.profile.xml"));

            using (var readStream = File.Open(file.FullName, FileMode.Open))
            {
                var sut = new RankedLanguageIdentifierFactory();
                _identifier = sut.Load(readStream);
            }

            var spanishLanguageIdentifier = _identifier.Identify(spanishMessage);
            var mostCertainSpaLanguage    = spanishLanguageIdentifier.FirstOrDefault();
            var theSpaLanguage            = mostCertainSpaLanguage.Item1.Iso639_3;

            CultureInfo ci = new CultureInfo(theSpaLanguage);

            System.Threading.Thread.CurrentThread.CurrentUICulture = ci;
            System.Threading.Thread.CurrentThread.CurrentCulture   = CultureInfo.CreateSpecificCulture(ci.Name);
            System.Threading.Thread.CurrentThread.CurrentCulture.Name.Should().Be("es-MX", ci.Name);
        }
        public Task <bool> Process(ICrawler crawler, PropertyBag propertyBag)
        {
            AspectF.Define.
            NotNull(crawler, "crawler").
            NotNull(propertyBag, "propertyBag");

            string content = propertyBag.Text;

            if (content.IsNullOrEmpty())
            {
                return(Task.FromResult(true));
            }

            IEnumerable <Tuple <LanguageInfo, double> > languages = _identifier.Identify(content);
            Tuple <LanguageInfo, double> mostCertainLanguage      = languages.FirstOrDefault();

            if (mostCertainLanguage != null)
            {
                propertyBag[LanguagePropertyName].Value = mostCertainLanguage.Item1.Iso639_3;
            }

            return(Task.FromResult(true));
        }
Ejemplo n.º 16
0
        private static string PrepareWord(RankedLanguageIdentifier identifier, string or)
        {
            var languages   = identifier.Identify(or).ToArray();
            var myLanguages = languages.Where(x => x.Item1.Iso639_3 == "eng" || x.Item1.Iso639_3 == "rus").ToArray();

            if (!myLanguages.Any())
            {
                throw new NotSupportedException();
            }

            var      max      = myLanguages.Min(x => x.Item2);
            var      lang     = myLanguages.First(x => x.Item2 == max);
            var      langCode = lang?.Item1.Iso639_3;
            IStemmer stemmer  = langCode switch
            {
                "eng" => new EnglishStemmer(),
                "rus" => new RussianStemmer(),
                _ => throw new Exception()
            };
            var stemmed = stemmer.Stem(or);

            return(stemmed);
        }
    }
Ejemplo n.º 17
0
        public ResultMedia MapInstaMedia(InstaMedia m, string tag)
        {
            var langResult       = languageIdentifier.Identify(m.Caption?.Text ?? "").ToArray();
            int intCommentsCount = 0;

            int.TryParse(m.CommentsCount, out intCommentsCount);
            return(new ResultMedia()
            {
                MainTag = tag,
                Pk = m.Pk,
                Code = m.Code,
                InstaIdentifier = m.InstaIdentifier,
                Title = m.Title,
                Caption = m.Caption?.Text,
                LangCode = langResult.FirstOrDefault()?.Item1.Iso639_2T,
                LangCodeScore = langResult.FirstOrDefault()?.Item2,
                SecondLangCode = langResult[1]?.Item1.Iso639_2T,
                SecondLangCodeScore = langResult[1]?.Item2,
                Date = m.TakenAt,
                DateYear = m.TakenAt.Year,
                DateMonth = m.TakenAt.Month,
                DateWeek = CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(m.TakenAt, CalendarWeekRule.FirstDay, DayOfWeek.Monday),
                DateMonthLabel = $"{m.TakenAt.Year}/{m.TakenAt.Month}",
                DateWeekLabel = $"{m.TakenAt.Year}/{CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(m.TakenAt, CalendarWeekRule.FirstDay, DayOfWeek.Monday)}",
                UserName = m.User.UserName,
                UserPK = m.User.Pk,
                LikesCount = m.LikesCount,
                CommentsCount = intCommentsCount,
                CommentsCountText = m.CommentsCount,
                UserFollowersCount = m.User.FollowersCount,
                MutualFollowers = m.User.MutualFollowers,
                LocationShortName = m.Location?.ShortName ?? "",
                LocationName = m.Location?.Name ?? "",
                LocationCity = m.Location?.City ?? "",
                LocationAddress = m.Location?.Address ?? "",
                LocationFacebookPlacesId = m.Location?.FacebookPlacesId ?? -1,
                LocationLat = m.Location?.Lat ?? 0,
                LocationLng = m.Location?.Lng ?? 0,
                VideoDuration = m.VideoDuration,
                ViewCount = m.ViewCount,
                MediaType = (int)m.MediaType,
                HasVideo = m.MediaType == InstaMediaType.Video || (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Video || (i.Videos?.Any() ?? false)) ?? false),
                HasImage = m.MediaType == InstaMediaType.Image || (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Image || (i.Images?.Any() ?? false)) ?? false),
                ImageCount = m.MediaType == InstaMediaType.Image ? 1 : m.MediaType == InstaMediaType.Video ? 0 : m.Carousel.Count(c => c.MediaType == InstaMediaType.Image),
                VideoCount = m.MediaType == InstaMediaType.Video ? 1 : m.MediaType == InstaMediaType.Image ? 0 : m.Carousel.Count(c => c.MediaType == InstaMediaType.Video),
                IsMultiPost = m.IsMultiPost,
                ProductType = m.ProductType,
                NumberOfQualities = m.NumberOfQualities,
                Tags = GetHashtags(m.Caption?.Text ?? "").Select(t => new ResultMediaTag()
                {
                    MediaCode = m.Code,
                    Tag = t,
                    Type = 1
                }).Concat(m.PreviewComments.Where(c => c.UserId == m.User.Pk).Select(c => c.Text ?? "").SelectMany(t => GetHashtags(t)).Select(t => new ResultMediaTag()
                {
                    MediaCode = m.Code,
                    Tag = t,
                    Type = 2
                })).Concat(m.PreviewComments.Where(c => c.UserId != m.User.Pk).Select(c => c.Text ?? "").SelectMany(t => GetHashtags(t)).Select(t => new ResultMediaTag()
                {
                    MediaCode = m.Code,
                    Tag = t,
                    Type = 3
                })),
                ResultMediaProductTags = m.ProductTags.Select(t => new ResultMediaProductTag()
                {
                    MediaCode = m.Code,
                    Name = t.Product?.Name ?? "",
                    ExternalUrl = t.Product?.ExternalUrl ?? "",
                    MerchantUserName = t.Product?.Merchant?.Username ?? "",
                    MainImageUri = t.Product?.MainImage?.FirstOrDefault()?.Uri ?? "",
                    FullPrice = t.Product?.FullPrice ?? ""
                }),
                ResultMediaVideoUrls = (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Video || (i.Videos?.Any() ?? false)) ?? false)
                    ? m.Videos.Select(v => new ResultMediaVideoUrl()
                {
                    MediaCode = m.Code,
                    Url = v.Uri,
                    Length = v.Length,
                    Height = v.Height,
                    Width = v.Width,
                    Type = v.Type,
                    CarouselIndex = -1
                }).Concat(m.Carousel.SelectMany((i, index) => i.Videos.Select(v => new ResultMediaVideoUrl()
                {
                    MediaCode = m.Code,
                    Url = v.Uri,
                    Length = v.Length,
                    Height = v.Height,
                    Width = v.Width,
                    Type = v.Type,
                    CarouselIndex = index
                })))
                    : m.Videos.Select(v => new ResultMediaVideoUrl()
                {
                    MediaCode = m.Code,
                    Url = v.Uri,
                    Length = v.Length,
                    Height = v.Height,
                    Width = v.Width,
                    Type = v.Type,
                    CarouselIndex = -1
                }),
                ResultMediaImageUrls = (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Image || (i.Images?.Any() ?? false)) ?? false)
                    ? m.Images.Select(i => new ResultMediaImageUrl()
                {
                    MediaCode = m.Code,
                    Url = i.Uri,
                    Height = i.Height,
                    Width = i.Width,
                    CarouselIndex = -1
                }).Concat(m.Carousel.SelectMany((i, index) => i.Images.Select(v => new ResultMediaImageUrl()
                {
                    MediaCode = m.Code,
                    Url = v.Uri,
                    Height = v.Height,
                    Width = v.Width,
                    CarouselIndex = index
                })))
                    : m.Images.Select(i => new ResultMediaImageUrl()
                {
                    MediaCode = m.Code,
                    Url = i.Uri,
                    Height = i.Height,
                    Width = i.Width,
                    CarouselIndex = -1
                }),
                ResultMediaUserTags = m.UserTags.Select(u => new ResultMediaUserTag()
                {
                    MediaCode = m.Code,
                    UserName = u.User.UserName
                })
            });
        }