Example #1
0
 public FrenchNumberWithUnitParserConfiguration(CultureInfo ci)
     : base(ci)
 {
     this.InternalNumberExtractor = NumberExtractor.GetInstance();
     this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration());
     this.ConnectorToken          = NumbersWithUnitDefinitions.ConnectorToken;
 }
Example #2
0
 public TestNumberRecognizerInitialization()
 {
     controlModel = new NumberModel(
         AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                               new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(EnglishCulture))),
         NumberExtractor.GetInstance(NumberMode.PureNumber));
 }
Example #3
0
 public DutchNumberWithUnitParserConfiguration(CultureInfo ci)
     : base(ci)
 {
     this.InternalNumberExtractor = NumberExtractor.GetInstance();
     this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration());
     this.ConnectorToken          = string.Empty;
 }
 public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci)
     : base(ci)
 {
     this.InternalNumberExtractor = NumberExtractor.GetInstance();
     this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration(
                                                                              new BaseNumberOptionsConfiguration(ci.Name)));
     this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
 }
Example #5
0
 protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci)
 {
     this.CultureInfo      = ci;
     this.UnitNumExtractor = NumberExtractor.GetInstance();
     this.BuildPrefix      = NumbersWithUnitDefinitions.BuildPrefix;
     this.BuildSuffix      = NumbersWithUnitDefinitions.BuildSuffix;
     this.ConnectorToken   = NumbersWithUnitDefinitions.ConnectorToken;
 }
 protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci)
 {
     this.CultureInfo      = ci;
     this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit);
     this.BuildPrefix      = NumbersWithUnitDefinitions.BuildPrefix;
     this.BuildSuffix      = NumbersWithUnitDefinitions.BuildSuffix;
     this.ConnectorToken   = string.Empty;
 }
Example #7
0
 public EnglishNumberWithUnitParserConfiguration(CultureInfo ci)
     : base(ci)
 {
     this.InternalNumberExtractor = NumberExtractor.GetInstance();
     this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(
                                                                              new BaseNumberOptionsConfiguration(ci.Name)));
     this.ConnectorToken = string.Empty;
 }
Example #8
0
 protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci)
 {
     this.CultureInfo                = ci;
     this.UnitNumExtractor           = NumberExtractor.GetInstance();
     this.BuildPrefix                = NumbersWithUnitDefinitions.BuildPrefix;
     this.BuildSuffix                = NumbersWithUnitDefinitions.BuildSuffix;
     this.ConnectorToken             = string.Empty;
     this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
 }
        public TurkishNumberWithUnitParserConfiguration(CultureInfo ci)
            : base(ci)
        {
            var numConfig = new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None);

            this.InternalNumberExtractor = NumberExtractor.GetInstance();
            this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                                                                 new TurkishNumberParserConfiguration(numConfig));
            this.ConnectorToken = string.Empty;
        }
        public ItalianNumberWithUnitParserConfiguration(CultureInfo ci)
            : base(ci)
        {
            var numConfig = new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None);

            this.InternalNumberExtractor = NumberExtractor.GetInstance();
            this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                                                                 new ItalianNumberParserConfiguration(numConfig));
            this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
        }
Example #11
0
 protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci)
 {
     this.CultureInfo                = ci;
     this.UnitNumExtractor           = NumberExtractor.GetInstance();
     this.BuildPrefix                = NumbersWithUnitDefinitions.BuildPrefix;
     this.BuildSuffix                = NumbersWithUnitDefinitions.BuildSuffix;
     this.ConnectorToken             = NumbersWithUnitDefinitions.ConnectorToken;
     this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
     this.PmNonUnitRegex             = new Regex(BaseUnits.PmNonUnitRegex, RegexOptions.IgnoreCase);
 }
        protected HindiNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo      = ci;
            this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit);
            this.BuildPrefix      = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix      = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken   = string.Empty;

            AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
        }
        public EnglishNumberWithUnitParserConfiguration(CultureInfo ci)
            : base(ci)
        {
            var numConfig = new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None);

            this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig);
            this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                                                                 new EnglishNumberParserConfiguration(numConfig));
            this.ConnectorToken = string.Empty;

            this.TypeList = DimensionExtractorConfiguration.DimensionTypeList;
        }
Example #14
0
        public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci)
            : base(ci)
        {
            var numConfig = new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None);

            this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig);
            this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                                                                 new PortugueseNumberParserConfiguration(numConfig));
            this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;

            this.TypeList = DimensionExtractorConfiguration.DimensionTypeList;
        }
        protected ItalianNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo      = ci;
            this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit);
            this.BuildPrefix      = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix      = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken   = NumbersWithUnitDefinitions.ConnectorToken;

            AmbiguityFiltersDict            = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
            TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict);
            DimensionAmbiguityFiltersDict   = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict);
        }
        protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo = ci;

            var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit);

            this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig);

            this.BuildPrefix    = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix    = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;
        }
Example #17
0
        protected FrenchNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo                = ci;
            this.UnitNumExtractor           = NumberExtractor.GetInstance();
            this.BuildPrefix                = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix                = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken             = NumbersWithUnitDefinitions.ConnectorToken;
            this.CompoundUnitConnectorRegex = new Regex(NumbersWithUnitDefinitions.CompoundUnitConnectorRegex, RegexOptions.IgnoreCase);
            this.PmNonUnitRegex             = new Regex(BaseUnits.PmNonUnitRegex, RegexOptions.IgnoreCase);

            AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
        }
        public void GetIntegers_EmptyString_ShouldReturnEmptyList()
        {
            //Arrange
            NumberExtractor extractor = new NumberExtractor();
            string          numbers   = string.Empty;

            //Act
            var result = extractor.GetIntegers(numbers);

            //Assert
            var expected = new List <int>();

            Assert.Equal(result, expected);
        }
        protected GermanNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo = ci;

            var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit);

            this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig);

            this.BuildPrefix    = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix    = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken;

            AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
        }
Example #20
0
        public FrenchNumberWithUnitParserConfiguration(CultureInfo ci)
            : base(ci)
        {
            var numConfig = new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None);

            this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig);
            this.InternalNumberParser    = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
                                                                                 new FrenchNumberParserConfiguration(numConfig));

            // A space is added to the token to avoid interpreting part of a unit as a connector (e.g. 'de' in 'degrés')
            this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken + " ";

            this.TypeList = DimensionExtractorConfiguration.DimensionTypeList;
        }
Example #21
0
        protected SwedishNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo = ci;

            var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit);

            this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig);

            this.BuildPrefix    = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix    = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken = string.Empty;

            AmbiguityFiltersDict            = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
            TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict);
            DimensionAmbiguityFiltersDict   = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict);
        }
        public void GetIntegers_OneAsString_ShouldReturnListWithOne()
        {
            //Arrange
            NumberExtractor extractor = new NumberExtractor();
            string          numbers   = "1";

            //Act
            var result = extractor.GetIntegers(numbers);

            //Assert
            var expected = new List <int> {
                1
            };

            Assert.Equal(result, expected);
        }
Example #23
0
        public void IdentifyEpisode(File file)
        {
            if (file.ShowMatches.MostProbable != null)
            {
                var episodes = this.Database.GetEpisodes(file.ShowMatches.MostProbable.Guess);

                var extraction = new NumberExtractor(file);
                if (extraction.SeasonGuess != (int)Episode.Number.Unknown && extraction.EpisodeGuess != (int)Episode.Number.Unknown)
                {
                    var guess = episodes.FirstOrDefault(e => e.SeasonNumber == extraction.SeasonGuess && e.EpisodeNumber == extraction.EpisodeGuess);
                    if (guess != null && guess.IsIdentified)
                    {
                        file.EpisodeMatches.Add(new WeightedGuess<Episode>(guess, 1));
                    }
                }
            }
        }
        protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci)
        {
            this.CultureInfo = ci;

            // PlaceHolderMixed allows to extract numbers from expressions like 'USD15', '15USD' where there is no space between
            // alphabetic and numeric characters (PlaeHolderDefault does not extract numbers from expressions like 'USD15').
            var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit, BaseNumbers.PlaceHolderMixed);

            this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig);

            this.BuildPrefix    = NumbersWithUnitDefinitions.BuildPrefix;
            this.BuildSuffix    = NumbersWithUnitDefinitions.BuildSuffix;
            this.ConnectorToken = string.Empty;

            AmbiguityFiltersDict            = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.AmbiguityFiltersDict);
            TemperatureAmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.TemperatureAmbiguityFiltersDict);
            DimensionAmbiguityFiltersDict   = DefinitionLoader.LoadAmbiguityFilters(NumbersWithUnitDefinitions.DimensionAmbiguityFiltersDict);
        }
 public void ShouldRecogniseMergedSeasonAndEpisodeNumbersForFirstTenSeasons()
 {
     var extraction = new NumberExtractor(new File(@"714.tvrip.avi"));
     Assert.AreEqual(7, extraction.SeasonGuess);
     Assert.AreEqual(14, extraction.EpisodeGuess);
 }
 public void ShouldRecogniseMergedSeasonAndEpisodeNumbersAfterTenthSeasons()
 {
     var extraction = new NumberExtractor(new File(@"1205.tvrip.avi"));
     Assert.AreEqual(12, extraction.SeasonGuess);
     Assert.AreEqual(5, extraction.EpisodeGuess);
 }
 public void ShouldRecogniseEpisodeInfoAfterTheShowsName()
 {
     var extraction = new NumberExtractor(new File(@"lost.604.hdtv.avi"));
     Assert.AreEqual(6, extraction.SeasonGuess);
     Assert.AreEqual(4, extraction.EpisodeGuess);
 }
 public void ShouldNotIdentifyTheEpisodeWhenMultipleNumbersExist()
 {
     var extraction = new NumberExtractor(new File(@"lost.123.or.324.tvrip.avi"));
     Assert.AreEqual((int)Episode.Number.Unknown, extraction.SeasonGuess);
     Assert.AreEqual((int)Episode.Number.Unknown, extraction.EpisodeGuess);
 }
 public void ShouldNotIdentifyTheEpisodeWhenItsTwoEpisodesInOne()
 {
     var extraction = new NumberExtractor(new File(@"lost.s1e23-e24.tvrip.avi"));
     Assert.AreEqual(1, extraction.SeasonGuess);
     Assert.AreEqual((int)Episode.Number.Unknown, extraction.EpisodeGuess);
 }
 public void ShouldNotConsiderTimesAsAnEpisodeNumber()
 {
     var extraction = new NumberExtractor(new File(@"217 - At 630pm.avi"));
     Assert.AreEqual(2, extraction.SeasonGuess);
     Assert.AreEqual(17, extraction.EpisodeGuess);
 }
 public void ShouldNotConsiderOrdinalNumbersAsAnEpisodeNumber()
 {
     var extraction = new NumberExtractor(new File(@"217 - The 100th episode.avi"));
     Assert.AreEqual(2, extraction.SeasonGuess);
     Assert.AreEqual(17, extraction.EpisodeGuess);
 }
 public void SetUp()
 {
     _numberExtractor = new NumberExtractor();
 }
 public void ShouldRecogniseMergedSeasonAndEpisodeNumbersOnTheirOwn()
 {
     var extraction = new NumberExtractor(new File(@"123.avi"));
     Assert.AreEqual(1, extraction.SeasonGuess);
     Assert.AreEqual(23, extraction.EpisodeGuess);
 }
 public void ShouldRecogniseSeparateSeasonAndEpisodeNumbersInUpperCase()
 {
     var extraction = new NumberExtractor(new File(@"S1E02.tvrip.avi"));
     Assert.AreEqual(1, extraction.SeasonGuess);
     Assert.AreEqual(2, extraction.EpisodeGuess);
 }
        // Currently, this extractor is only for English number extracting.
        public List <ExtractResult> Extract(string source)
        {
            var result = new List <ExtractResult>();

            var ers = NumberExtractor.Extract(source);

            if (ers.Count == 0)
            {
                return(result);
            }

            var groups = new int[ers.Count];

            groups[0] = 0;

            for (var idx = 0; idx < ers.Count - 1; idx++)
            {
                if (!((string)ers[idx].Data).StartsWith(Constants.INTEGER_PREFIX) ||
                    !((string)ers[idx + 1].Data).StartsWith(Constants.INTEGER_PREFIX))
                {
                    groups[idx + 1] = groups[idx] + 1;
                    continue;
                }

                var match = RoundNumberIntegerRegexWithLocks.Match(ers[idx].Text);

                if (!match.Success || match.Length != ers[idx].Length)
                {
                    groups[idx + 1] = groups[idx] + 1;
                    continue;
                }

                var middleBegin = ers[idx].Start + ers[idx].Length ?? 0;
                var middleEnd   = ers[idx + 1].Start ?? 0;
                var middleStr   = source.Substring(middleBegin, middleEnd - middleBegin).Trim().ToLowerInvariant();

                // Separated by whitespace
                if (string.IsNullOrEmpty(middleStr))
                {
                    groups[idx + 1] = groups[idx];
                    continue;
                }

                // Separated by connectors
                match = ConnectorRegex.Match(middleStr);
                if (match.Success && match.Index == 0 && match.Length == middleStr.Length)
                {
                    groups[idx + 1] = groups[idx];
                }
                else
                {
                    groups[idx + 1] = groups[idx] + 1;
                }
            }

            for (var idx = 0; idx < ers.Count; idx++)
            {
                if (idx == 0 || groups[idx] != groups[idx - 1])
                {
                    var tmpExtractResult = ers[idx];
                    tmpExtractResult.Data = new List <ExtractResult>
                    {
                        new ExtractResult
                        {
                            Data   = ers[idx].Data,
                            Length = ers[idx].Length,
                            Start  = ers[idx].Start,
                            Text   = ers[idx].Text,
                            Type   = ers[idx].Type
                        }
                    };
                    result.Add(tmpExtractResult);
                }

                // Reduce extract results in same group
                if (idx + 1 < ers.Count && groups[idx + 1] == groups[idx])
                {
                    var group = groups[idx];

                    var periodBegin = result[group].Start ?? 0;
                    var periodEnd   = (ers[idx + 1].Start ?? 0) + (ers[idx + 1].Length ?? 0);

                    result[group].Length = periodEnd - periodBegin;
                    result[group].Text   = source.Substring(periodBegin, periodEnd - periodBegin);
                    result[group].Type   = Constants.SYS_NUM;
                    (result[group].Data as List <ExtractResult>)?.Add(ers[idx + 1]);
                }
            }

            for (var idx = 0; idx < result.Count; idx++)
            {
                var innerData = result[idx].Data as List <ExtractResult>;
                if (innerData?.Count == 1)
                {
                    result[idx] = innerData[0];
                }
            }

            return(result);
        }