public Payload RunPlugin(Payload Input) { Payload pData = new Payload(); pData.FileID = Input.FileID; pData.SegmentID = Input.SegmentID; for (int i = 0; i < Input.StringList.Count; i++) { if (Input.StringList[i].Length > 0) { var NB_Data = NBIdentifier.Identify(Input.StringList[i]); var RL_Data = RLIdentifier.Identify(Input.StringList[i]); pData.StringArrayList.Add(new string[2] { NB_Data.First().Item1.Iso639_2T.Replace("simple", "en"), RL_Data.First().Item1.Iso639_2T.Replace("simple", "en") }); } else { pData.StringArrayList.Add(new string[2] { "", "" }); } pData.SegmentNumber.Add(Input.SegmentNumber[i]); } return(pData); }
private async Task HandleCommandAsync(SocketMessage arg) { SocketUserMessage msg = arg as SocketUserMessage; if (msg == null || msg.Author.Id == Client.CurrentUser.Id) { return; } int pos = 0; if (!arg.Author.IsBot && (msg.HasMentionPrefix(Client.CurrentUser, ref pos) || msg.HasStringPrefix("j.", ref pos))) { SocketCommandContext context = new SocketCommandContext(Client, msg); var result = await _commands.ExecuteAsync(context, pos, null); if (!result.IsSuccess) { Console.WriteLine(result.Error.ToString() + ": " + result.ErrorReason); } else { return; } } // Spell check if (!(msg.Content.StartsWith("!") || msg.Content.StartsWith(".") || (msg.Content.Length > 2 && (msg.Content[1] == '.' || msg.Content[1] == '!')) || (msg.Content.Length > 3 && (msg.Content[2] == '.' || msg.Content[2] == '!')))) // Common used bot prefix { string msgText = Regex.Replace(msg.Content, "https?:\\/\\/(www\\.)?[^\\.]+\\.([\\s\\S]+(\\/|\\.)?)+", ""); var languages = _identifier.Identify(msg.Content); var mostCertainLanguage = languages.FirstOrDefault(); if (mostCertainLanguage != null && File.Exists("Dictionaries/" + mostCertainLanguage.Item1.Iso639_2T + ".dic")) { var checker = _dictionaries[mostCertainLanguage.Item1.Iso639_2T]; foreach (string s in msgText.Split(_splitChar, StringSplitOptions.RemoveEmptyEntries)) { if (!s.Any(x => char.IsLetter(x))) { continue; } if (!checker.Check(s)) { var word = s.Trim(_splitChar); var suggestions = checker.Suggest(word).ToArray(); if (suggestions.Length == 0) { await msg.Channel.SendMessageAsync("\"" + s + "\" doesn't exists"); } else { await msg.Channel.SendMessageAsync("\"" + s + "\" doesn't exists, maybe you meant \"" + suggestions[0] + "\"?"); } break; } } } } }
public string TryDetectLanguague(string text) { string result = string.Empty; if (_LanguageIdentificationFailed) { return(result); } try { if (_NTextCatFactory == null || _NTextCatIdentifier == null) { _NTextCatFactory = new RankedLanguageIdentifierFactory(); _NTextCatIdentifier = _NTextCatFactory.Load(_NTextCatLanguageModelsPath); } var languages = _NTextCatIdentifier.Identify(text); var mostCertainLanguage = languages.FirstOrDefault(); if (mostCertainLanguage != null) { result = ConvertISOLangugueNameToSystemName(mostCertainLanguage.Item1.Iso639_3); } } catch (Exception e) { _LanguageIdentificationFailed = true; _Logger?.WriteLog(e.ToString()); } return(result); }
public DetectedLanguageResponse DetectLanguage(LanguageDetectRequest model) { IEnumerable <Tuple <LanguageInfo, double> > matches = _identifier.Identify(model.TextForLanguageClassification); DetectedLanguageResponse detectedLanguageResponse = FormatResponse(matches, model); return(detectedLanguageResponse); }
private static string Lang(string message) { var languages = Identifier.Identify(message); var mostCertainLanguage = languages.FirstOrDefault(); return(mostCertainLanguage != null ? mostCertainLanguage.Item1.Iso639_2T : "The language could not be identified with an acceptable degree of certainty"); }
public static void DetectLanguage(RankedLanguageIdentifier identifier, string sequence, int index) { var languages = identifier.Identify(sequence.ToLower()); var mostCertainLanguage = languages.FirstOrDefault(); if (mostCertainLanguage != null && mostCertainLanguage.Item1.Iso639_3 == "eng" && mostCertainLanguage.Item2 <= CONFIDENCE_THRESHOLD) { SaveToFile(sequence, mostCertainLanguage.Item2, index); } }
public static string GetLanguage(string text) { var languages = _identifier.Identify(text); var mostCertainLanguage = languages.FirstOrDefault(); if (mostCertainLanguage != null) { // http://en.wikipedia.org/wiki/List_of_ISO_639-3_codes return(mostCertainLanguage.Item1.Iso639_3); } return(null); }
string getLanguage(string text) { if (isEnglishCharacters(text)) { return("eng"); } if (isJapaneseCharacters(text)) { return("jpn"); } if (ncIdentifier_ == null) { var file = Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), @"Core14.profile.xml"); if (!File.Exists(file)) { MessageBox.Show("Profile file of NTextCat not found."); return(string.Empty); } var fac = new RankedLanguageIdentifierFactory(); ncIdentifier_ = fac.Load(file); } var languages = ncIdentifier_.Identify(text); var mostCertainLanguage = languages.FirstOrDefault(); string lang; if (mostCertainLanguage == null) { lang = string.Empty; } else { lang = mostCertainLanguage.Item1.Iso639_3; } //if (string.IsNullOrEmpty(lang) || // (lang != "eng" && lang != "jpn")) //{ // if (isEnglish_obsolete(text)) // lang = "eng"; // else if (isJapanese_obsolete(text)) // lang = "jpn"; //} return(lang); }
private Language DetectLanguage(string noticeContent) { // can be an absolute or relative path. Beware of 260 chars limitation of the path length in Windows. Linux allows 4096 chars. var languages = _rankedLanguageIdentifier.Identify(noticeContent.Substring(0, 500)); var iso = languages.FirstOrDefault()?.Item1.Iso639_2T; if (iso == null) { return(Language.Unknown); } var success = Enum.TryParse(typeof(Language), iso, true, out var language); return(success ? (Language)language : Language.Unknown); }
public Domain.Language.Language Identify(string text) { var languages = _identifier.Identify(text); var mostCertainLanguage = languages.FirstOrDefault(); var language = mostCertainLanguage?.Item1; if (language == null) { return(null); } return(new Domain.Language.Language(language.Iso639_3)); }
public void RetriveLanguageDataFromString() { var spanishMessage = "como esta por que"; var englishMessage = "why are we doing this"; var file = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Configuration\Core14.profile.xml")); using (var readStream = File.Open(file.FullName, FileMode.Open)) { var sut = new RankedLanguageIdentifierFactory(); _identifier = sut.Load(readStream); } var spanishLanguageIdentifier = _identifier.Identify(spanishMessage); var englishLanguageIdentifier = _identifier.Identify(englishMessage); var mostCertainSpaLanguage = spanishLanguageIdentifier.FirstOrDefault(); var mostCertainEngLanguage = englishLanguageIdentifier.FirstOrDefault(); var theSpaLanguage = mostCertainSpaLanguage.Item1.Iso639_3; var theEngLanguage = mostCertainEngLanguage.Item1.Iso639_3; theSpaLanguage.Should().Be("es-MX"); theEngLanguage.Should().Be("en-US"); }
public string GetLanguageId(string contents) { try { var result = _languageIdentifier.Identify(contents); LanguageInfo bestFitLangInfo = null; var minMismatch = double.MaxValue; foreach (var item in result) { if (item.Item2 < minMismatch) { bestFitLangInfo = item.Item1; minMismatch = item.Item2; } } return(bestFitLangInfo?.Iso639_3 ?? "eng"); } catch (Exception e) { return("eng"); } }
public string Detect(string text) { IEnumerable <Tuple <LanguageInfo, double> > languages = identifier.Identify(text); Tuple <LanguageInfo, double> mostCertainLanguage = languages.FirstOrDefault(); CultureInfo culture = CultureInfo.GetCultures(CultureTypes.NeutralCultures).Where( ci => string.Equals(ci.ThreeLetterISOLanguageName, mostCertainLanguage.Item1.Iso639_3, StringComparison.OrdinalIgnoreCase)).FirstOrDefault(); if (culture != null) { Console.WriteLine("The language of the text is '{0}' (ISO639-3 code)", mostCertainLanguage.Item1.Iso639_3); return(culture.Name); } else { Console.WriteLine("The language couldn’t be identified with an acceptable degree of certainty"); return(""); } }
public void ChangeCurrentCulture() { var spanishMessage = "No stock disponible en la maquina por favor"; var file = new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Configuration\Core14.profile.xml")); using (var readStream = File.Open(file.FullName, FileMode.Open)) { var sut = new RankedLanguageIdentifierFactory(); _identifier = sut.Load(readStream); } var spanishLanguageIdentifier = _identifier.Identify(spanishMessage); var mostCertainSpaLanguage = spanishLanguageIdentifier.FirstOrDefault(); var theSpaLanguage = mostCertainSpaLanguage.Item1.Iso639_3; CultureInfo ci = new CultureInfo(theSpaLanguage); System.Threading.Thread.CurrentThread.CurrentUICulture = ci; System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.CreateSpecificCulture(ci.Name); System.Threading.Thread.CurrentThread.CurrentCulture.Name.Should().Be("es-MX", ci.Name); }
public Task <bool> Process(ICrawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); string content = propertyBag.Text; if (content.IsNullOrEmpty()) { return(Task.FromResult(true)); } IEnumerable <Tuple <LanguageInfo, double> > languages = _identifier.Identify(content); Tuple <LanguageInfo, double> mostCertainLanguage = languages.FirstOrDefault(); if (mostCertainLanguage != null) { propertyBag[LanguagePropertyName].Value = mostCertainLanguage.Item1.Iso639_3; } return(Task.FromResult(true)); }
private static string PrepareWord(RankedLanguageIdentifier identifier, string or) { var languages = identifier.Identify(or).ToArray(); var myLanguages = languages.Where(x => x.Item1.Iso639_3 == "eng" || x.Item1.Iso639_3 == "rus").ToArray(); if (!myLanguages.Any()) { throw new NotSupportedException(); } var max = myLanguages.Min(x => x.Item2); var lang = myLanguages.First(x => x.Item2 == max); var langCode = lang?.Item1.Iso639_3; IStemmer stemmer = langCode switch { "eng" => new EnglishStemmer(), "rus" => new RussianStemmer(), _ => throw new Exception() }; var stemmed = stemmer.Stem(or); return(stemmed); } }
public ResultMedia MapInstaMedia(InstaMedia m, string tag) { var langResult = languageIdentifier.Identify(m.Caption?.Text ?? "").ToArray(); int intCommentsCount = 0; int.TryParse(m.CommentsCount, out intCommentsCount); return(new ResultMedia() { MainTag = tag, Pk = m.Pk, Code = m.Code, InstaIdentifier = m.InstaIdentifier, Title = m.Title, Caption = m.Caption?.Text, LangCode = langResult.FirstOrDefault()?.Item1.Iso639_2T, LangCodeScore = langResult.FirstOrDefault()?.Item2, SecondLangCode = langResult[1]?.Item1.Iso639_2T, SecondLangCodeScore = langResult[1]?.Item2, Date = m.TakenAt, DateYear = m.TakenAt.Year, DateMonth = m.TakenAt.Month, DateWeek = CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(m.TakenAt, CalendarWeekRule.FirstDay, DayOfWeek.Monday), DateMonthLabel = $"{m.TakenAt.Year}/{m.TakenAt.Month}", DateWeekLabel = $"{m.TakenAt.Year}/{CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(m.TakenAt, CalendarWeekRule.FirstDay, DayOfWeek.Monday)}", UserName = m.User.UserName, UserPK = m.User.Pk, LikesCount = m.LikesCount, CommentsCount = intCommentsCount, CommentsCountText = m.CommentsCount, UserFollowersCount = m.User.FollowersCount, MutualFollowers = m.User.MutualFollowers, LocationShortName = m.Location?.ShortName ?? "", LocationName = m.Location?.Name ?? "", LocationCity = m.Location?.City ?? "", LocationAddress = m.Location?.Address ?? "", LocationFacebookPlacesId = m.Location?.FacebookPlacesId ?? -1, LocationLat = m.Location?.Lat ?? 0, LocationLng = m.Location?.Lng ?? 0, VideoDuration = m.VideoDuration, ViewCount = m.ViewCount, MediaType = (int)m.MediaType, HasVideo = m.MediaType == InstaMediaType.Video || (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Video || (i.Videos?.Any() ?? false)) ?? false), HasImage = m.MediaType == InstaMediaType.Image || (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Image || (i.Images?.Any() ?? false)) ?? false), ImageCount = m.MediaType == InstaMediaType.Image ? 1 : m.MediaType == InstaMediaType.Video ? 0 : m.Carousel.Count(c => c.MediaType == InstaMediaType.Image), VideoCount = m.MediaType == InstaMediaType.Video ? 1 : m.MediaType == InstaMediaType.Image ? 0 : m.Carousel.Count(c => c.MediaType == InstaMediaType.Video), IsMultiPost = m.IsMultiPost, ProductType = m.ProductType, NumberOfQualities = m.NumberOfQualities, Tags = GetHashtags(m.Caption?.Text ?? "").Select(t => new ResultMediaTag() { MediaCode = m.Code, Tag = t, Type = 1 }).Concat(m.PreviewComments.Where(c => c.UserId == m.User.Pk).Select(c => c.Text ?? "").SelectMany(t => GetHashtags(t)).Select(t => new ResultMediaTag() { MediaCode = m.Code, Tag = t, Type = 2 })).Concat(m.PreviewComments.Where(c => c.UserId != m.User.Pk).Select(c => c.Text ?? "").SelectMany(t => GetHashtags(t)).Select(t => new ResultMediaTag() { MediaCode = m.Code, Tag = t, Type = 3 })), ResultMediaProductTags = m.ProductTags.Select(t => new ResultMediaProductTag() { MediaCode = m.Code, Name = t.Product?.Name ?? "", ExternalUrl = t.Product?.ExternalUrl ?? "", MerchantUserName = t.Product?.Merchant?.Username ?? "", MainImageUri = t.Product?.MainImage?.FirstOrDefault()?.Uri ?? "", FullPrice = t.Product?.FullPrice ?? "" }), ResultMediaVideoUrls = (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Video || (i.Videos?.Any() ?? false)) ?? false) ? m.Videos.Select(v => new ResultMediaVideoUrl() { MediaCode = m.Code, Url = v.Uri, Length = v.Length, Height = v.Height, Width = v.Width, Type = v.Type, CarouselIndex = -1 }).Concat(m.Carousel.SelectMany((i, index) => i.Videos.Select(v => new ResultMediaVideoUrl() { MediaCode = m.Code, Url = v.Uri, Length = v.Length, Height = v.Height, Width = v.Width, Type = v.Type, CarouselIndex = index }))) : m.Videos.Select(v => new ResultMediaVideoUrl() { MediaCode = m.Code, Url = v.Uri, Length = v.Length, Height = v.Height, Width = v.Width, Type = v.Type, CarouselIndex = -1 }), ResultMediaImageUrls = (m.Carousel?.Any(i => i.MediaType == InstaMediaType.Image || (i.Images?.Any() ?? false)) ?? false) ? m.Images.Select(i => new ResultMediaImageUrl() { MediaCode = m.Code, Url = i.Uri, Height = i.Height, Width = i.Width, CarouselIndex = -1 }).Concat(m.Carousel.SelectMany((i, index) => i.Images.Select(v => new ResultMediaImageUrl() { MediaCode = m.Code, Url = v.Uri, Height = v.Height, Width = v.Width, CarouselIndex = index }))) : m.Images.Select(i => new ResultMediaImageUrl() { MediaCode = m.Code, Url = i.Uri, Height = i.Height, Width = i.Width, CarouselIndex = -1 }), ResultMediaUserTags = m.UserTags.Select(u => new ResultMediaUserTag() { MediaCode = m.Code, UserName = u.User.UserName }) }); }