public FrenchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration()); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; }
public TestNumberRecognizerInitialization() { controlModel = new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(EnglishCulture))), NumberExtractor.GetInstance(NumberMode.PureNumber)); }
public DutchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration()); this.ConnectorToken = string.Empty; }
public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(ci.Name))); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; }
protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; }
protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; }
public EnglishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(ci.Name))); this.ConnectorToken = string.Empty; }
protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); }
public TurkishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { var numConfig = new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None); this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new TurkishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; }
public ItalianNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { var numConfig = new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None); this.InternalNumberExtractor = NumberExtractor.GetInstance(); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ItalianNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; }
protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); this.PmNonUnitRegex = new Regex(BaseUnits.PmNonUnitRegex, RegexOptions.IgnoreCase); }
protected HindiNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); }
public EnglishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { var numConfig = new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None); this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; }
public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { var numConfig = new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None); this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; }
protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); }
protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; }
protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; this.UnitNumExtractor = NumberExtractor.GetInstance(); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase); this.PmNonUnitRegex = new Regex(BaseUnits.PmNonUnitRegex, RegexOptions.IgnoreCase); AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); }
public void GetIntegers_EmptyString_ShouldReturnEmptyList() { //Arrange NumberExtractor extractor = new NumberExtractor(); string numbers = string.Empty; //Act var result = extractor.GetIntegers(numbers); //Assert var expected = new List <int>(); Assert.Equal(result, expected); }
protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); }
public FrenchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { var numConfig = new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None); this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration(numConfig)); // A space is added to the token to avoid interpreting part of a unit as a connector (e.g. 'de' in 'degrés') this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken + " "; this.TypeList = DimensionExtractorConfiguration.DimensionTypeList; }
protected SwedishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); }
public void GetIntegers_OneAsString_ShouldReturnListWithOne() { //Arrange NumberExtractor extractor = new NumberExtractor(); string numbers = "1"; //Act var result = extractor.GetIntegers(numbers); //Assert var expected = new List <int> { 1 }; Assert.Equal(result, expected); }
public void IdentifyEpisode(File file) { if (file.ShowMatches.MostProbable != null) { var episodes = this.Database.GetEpisodes(file.ShowMatches.MostProbable.Guess); var extraction = new NumberExtractor(file); if (extraction.SeasonGuess != (int)Episode.Number.Unknown && extraction.EpisodeGuess != (int)Episode.Number.Unknown) { var guess = episodes.FirstOrDefault(e => e.SeasonNumber == extraction.SeasonGuess && e.EpisodeNumber == extraction.EpisodeGuess); if (guess != null && guess.IsIdentified) { file.EpisodeMatches.Add(new WeightedGuess<Episode>(guess, 1)); } } } }
protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; // PlaceHolderMixed allows to extract numbers from expressions like 'USD15', '15USD' where there is no space between // alphabetic and numeric characters (PlaeHolderDefault does not extract numbers from expressions like 'USD15'). var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit, BaseNumbers.PlaceHolderMixed); this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict); TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict); DimensionAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict); }
public void ShouldRecogniseMergedSeasonAndEpisodeNumbersForFirstTenSeasons() { var extraction = new NumberExtractor(new File(@"714.tvrip.avi")); Assert.AreEqual(7, extraction.SeasonGuess); Assert.AreEqual(14, extraction.EpisodeGuess); }
public void ShouldRecogniseMergedSeasonAndEpisodeNumbersAfterTenthSeasons() { var extraction = new NumberExtractor(new File(@"1205.tvrip.avi")); Assert.AreEqual(12, extraction.SeasonGuess); Assert.AreEqual(5, extraction.EpisodeGuess); }
public void ShouldRecogniseEpisodeInfoAfterTheShowsName() { var extraction = new NumberExtractor(new File(@"lost.604.hdtv.avi")); Assert.AreEqual(6, extraction.SeasonGuess); Assert.AreEqual(4, extraction.EpisodeGuess); }
public void ShouldNotIdentifyTheEpisodeWhenMultipleNumbersExist() { var extraction = new NumberExtractor(new File(@"lost.123.or.324.tvrip.avi")); Assert.AreEqual((int)Episode.Number.Unknown, extraction.SeasonGuess); Assert.AreEqual((int)Episode.Number.Unknown, extraction.EpisodeGuess); }
public void ShouldNotIdentifyTheEpisodeWhenItsTwoEpisodesInOne() { var extraction = new NumberExtractor(new File(@"lost.s1e23-e24.tvrip.avi")); Assert.AreEqual(1, extraction.SeasonGuess); Assert.AreEqual((int)Episode.Number.Unknown, extraction.EpisodeGuess); }
public void ShouldNotConsiderTimesAsAnEpisodeNumber() { var extraction = new NumberExtractor(new File(@"217 - At 630pm.avi")); Assert.AreEqual(2, extraction.SeasonGuess); Assert.AreEqual(17, extraction.EpisodeGuess); }
public void ShouldNotConsiderOrdinalNumbersAsAnEpisodeNumber() { var extraction = new NumberExtractor(new File(@"217 - The 100th episode.avi")); Assert.AreEqual(2, extraction.SeasonGuess); Assert.AreEqual(17, extraction.EpisodeGuess); }
public void SetUp() { _numberExtractor = new NumberExtractor(); }
public void ShouldRecogniseMergedSeasonAndEpisodeNumbersOnTheirOwn() { var extraction = new NumberExtractor(new File(@"123.avi")); Assert.AreEqual(1, extraction.SeasonGuess); Assert.AreEqual(23, extraction.EpisodeGuess); }
public void ShouldRecogniseSeparateSeasonAndEpisodeNumbersInUpperCase() { var extraction = new NumberExtractor(new File(@"S1E02.tvrip.avi")); Assert.AreEqual(1, extraction.SeasonGuess); Assert.AreEqual(2, extraction.EpisodeGuess); }
// Currently, this extractor is only for English number extracting. public List <ExtractResult> Extract(string source) { var result = new List <ExtractResult>(); var ers = NumberExtractor.Extract(source); if (ers.Count == 0) { return(result); } var groups = new int[ers.Count]; groups[0] = 0; for (var idx = 0; idx < ers.Count - 1; idx++) { if (!((string)ers[idx].Data).StartsWith(Constants.INTEGER_PREFIX) || !((string)ers[idx + 1].Data).StartsWith(Constants.INTEGER_PREFIX)) { groups[idx + 1] = groups[idx] + 1; continue; } var match = RoundNumberIntegerRegexWithLocks.Match(ers[idx].Text); if (!match.Success || match.Length != ers[idx].Length) { groups[idx + 1] = groups[idx] + 1; continue; } var middleBegin = ers[idx].Start + ers[idx].Length ?? 0; var middleEnd = ers[idx + 1].Start ?? 0; var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim().ToLowerInvariant(); // Separated by whitespace if (string.IsNullOrEmpty(middleStr)) { groups[idx + 1] = groups[idx]; continue; } // Separated by connectors match = ConnectorRegex.Match(middleStr); if (match.Success && match.Index == 0 && match.Length == middleStr.Length) { groups[idx + 1] = groups[idx]; } else { groups[idx + 1] = groups[idx] + 1; } } for (var idx = 0; idx < ers.Count; idx++) { if (idx == 0 || groups[idx] != groups[idx - 1]) { var tmpExtractResult = ers[idx]; tmpExtractResult.Data = new List <ExtractResult> { new ExtractResult { Data = ers[idx].Data, Length = ers[idx].Length, Start = ers[idx].Start, Text = ers[idx].Text, Type = ers[idx].Type } }; result.Add(tmpExtractResult); } // Reduce extract results in same group if (idx + 1 < ers.Count && groups[idx + 1] == groups[idx]) { var group = groups[idx]; var periodBegin = result[group].Start ?? 0; var periodEnd = (ers[idx + 1].Start ?? 0) + (ers[idx + 1].Length ?? 0); result[group].Length = periodEnd - periodBegin; result[group].Text = source.Substring(periodBegin, periodEnd - periodBegin); result[group].Type = Constants.SYS_NUM; (result[group].Data as List <ExtractResult>)?.Add(ers[idx + 1]); } } for (var idx = 0; idx < result.Count; idx++) { var innerData = result[idx].Data as List <ExtractResult>; if (innerData?.Count == 1) { result[idx] = innerData[0]; } } return(result); }