Пример #1
0
        protected void Evaluate(int inputSetSize)
        {
            var inputRecords = GetRecords(inputSetSize);

            InputRecords = inputRecords.Count();

            var rawTokens = GetTokens(inputRecords);

            if (rawTokens == null || !rawTokens.Any())
            {
                throw new FormatikException("Could not find any similarities between the input document and the example");
            }

            this.Separators = GetOutputSeparators(rawTokens);

            Tokens = PurifyTokens(rawTokens, Separators);

            // construct output template
            SetTokenWrappers(Tokens, Separators);

            var firstToken = Tokens.First();
            var lastToken  = Tokens.Last();

            var headerSize = firstToken.Values
                             .Select(value => Example.IndexOf($"{firstToken.Prefix}{value.Value}{firstToken.Suffix}{(Separators.Count() > 1 ? Separators.Skip(1).First() : string.Empty)}"))
                             .Min(index => index);

            this.Header = headerSize > 0 ? Example.Substring(0, headerSize) : string.Empty;

            var footerStartAt = lastToken.Values
                                .Select(value =>
            {
                var lastTokenString = $"{(Separators.Count() > 1 ? Separators.Skip(1).First() : Separators.First())}{lastToken.Prefix}{value.Value}{lastToken.Suffix}";
                return(Example.LastIndexOf(lastTokenString) + lastTokenString.Length);
            })
                                .Max(index => index);

            this.Footer = footerStartAt > 0 ? Example.Substring(footerStartAt) : string.Empty;
        }