Пример #1
        public Format Evaluate(string userId, [FromBody] EvaluateData data)
            if (!ObjectId.TryParse(userId, out var _userId))
                logger.LogInformation($"User not found or not active - {userId}");
                return(Format.GetError(ErrorCode.UserNotFound, "User not found or not active"));

            if (string.IsNullOrEmpty(data.Name))
                logger.LogInformation($"Expected \"name\" parameter");
                return(Format.GetError(ErrorCode.MissingParameters, "Expected \"name\" parameter"));

            if (string.IsNullOrEmpty(data.Input) && string.IsNullOrEmpty(data.InputCacheId))
                logger.LogInformation("Expected at either \"input\" or \"inputCacheId\" parameters, or both.");
                return(Format.GetError(ErrorCode.MissingParameters, "Expected at either \"input\" or \"inputCacheId\" parameters, or both."));

            var inputCacheId = data.InputCacheId;

            if (string.IsNullOrEmpty(data.Example))
                logger.LogInformation($"Expected \"example\" parameter");
                return(Format.GetError(ErrorCode.MissingParameters, $"Expected \"example\" parameter"));

            var db = Common.GetDB(configuration.DbConnection);

            API.User user;
            Format   existingFormat;
            Task     addToCacheTask = null;

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                if (string.IsNullOrEmpty(inputCacheId))
                    inputCacheId = Formatik.GetRepeatableBase64HashCode(data.Input);

                // getting the cached input will be executed asyncroniously. We don't need the result from it
                // until we try to run the evaluation
                var getCachedInputQuery = string.IsNullOrEmpty(data.Input) ?
                                          GetCachedInputAsync(_userId, inputCacheId, asyncQueriesCancelationTokenSource.Token) :

                var existingFormatQuery = db
                                          .GetCollection <Format>("Formats")
                    Builders <Format> .Filter.And(
                        Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                        Builders <Format> .Filter.Eq(f => f.Name, data.Name)),
                                          .ContinueWith(cursor => cursor.Result
                                                        .FirstOrDefault(f => f.Formatik.Example == data.Example));

                user = userQuery.Result;
                if (user == null)
                    logger.LogInformation($"User not found or not active - {userId}");
                    return(Format.GetError(ErrorCode.UserNotFound, "User not found or not active"));

                var cachedInput = getCachedInputQuery != null ? getCachedInputQuery.Result : null;
                if (cachedInput != null)
                    // we found our cached input - great
                    data.Input = getCachedInputQuery.Result;
                else if (getCachedInputQuery != null)
                    // we could not find the cached input - return error
                    logger.LogInformation($"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input.");
                    return(Format.GetError(ErrorCode.InputCacheNotFound, $"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input."));
                    // new input - cache it asyncronously, don't wait for result
                    addToCacheTask = CacheInputAsync(_userId, inputCacheId, data.Input);

                existingFormat = existingFormatQuery.Result;

            if (existingFormat != null)
                var inputHash   = Formatik.GetRepeatableHashCode(data.Input);
                var exampleHash = Formatik.GetRepeatableHashCode(data.Example);

                if ((existingFormat.Formatik.InputHash == inputHash) ||
                    existingFormat.Formatik.ExampleHash == exampleHash)
                    existingFormat.InputCacheId = inputCacheId;
                    // we found a format with this name, however the input and example don't match
                    // the current parameters - update the existing doc
                    // Async operation - dont wait for completion
                    db.GetCollection <Format>("Formats")
                        Builders <Format> .Filter.And(
                            Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                            Builders <Format> .Filter.Eq(f => f.Name, data.Name)),
                        Builders <Format> .Update
                        .Set(f => f.Formatik.InputHash, inputHash)
                        .Set(f => f.Formatik.Example, data.Example)
                        .Set(f => f.Formatik.ExampleHash, exampleHash)
                        .Set(f => f.LastUpdated, DateTime.Now));

            BsonFormatik format;

                format = new BsonFormatik(data.Input, data.Example, user.MaxRecordCount ?? 1000);
            catch (FormatikException e)
                return(Format.GetError(ErrorCode.EvaluationError, e.Message));

            // search by format hash
            existingFormat = db
                             .GetCollection <Format>("Formats")
                             .Find(Builders <Format> .Filter.And(
                                       Builders <Format> .Filter.Eq(f => f.UserId, user._id),
                                       Builders <Format> .Filter.Eq(f => f.Formatik.Hash, format.Hash)))
                             .FirstOrDefault(f => format == f.Formatik);

            if (existingFormat != null)
                // We found a Format with same hash, however if we got to this point it means it has a different name
                // Update the name of the existing document
                // Async operation - don't wait for completion
                if (existingFormat.Name != data.Name)
                    db.GetCollection <Format>("Formats")
                        Builders <Format> .Filter.And(
                            Builders <Format> .Filter.Eq(f => f.UserId, user._id),
                            Builders <Format> .Filter.Eq(f => f.Formatik.Hash, format.Hash)),
                        Builders <Format> .Update
                        .Set(f => f.Name, data.Name)
                        .Set(f => f.LastUpdated, DateTime.Now));

                existingFormat.InputCacheId = inputCacheId;
                existingFormat.InputSize    = data.Input.Length;
                existingFormat.InputRecords = format.InputRecords;

            if (addToCacheTask != null)

            // If we've reached this point there is no entry in the DB matching this format - insert one
            var now = DateTime.Now;

            var newFormat = new Format()
                _id          = ObjectId.GenerateNewId(),
                UserId       = user._id,
                Name         = data.Name,
                Created      = now,
                Formatik     = format,
                InputCacheId = inputCacheId,
                InputSize    = data.Input.Length,
                InputRecords = format.InputRecords,
                Temporary    = data.Temporary ? (DateTime?)now : null

            // no need to wait for save, assume it does
            db.GetCollection <Format>("Formats")

Пример #2
        public API.Process Process(string userId, string formatId, [FromBody] ProcessData data)
            if (!ObjectId.TryParse(userId, out var _userId))
                logger.LogInformation($"User not found or not active - {userId}");
                return(API.Process.GetError(ErrorCode.UserNotFound, "User not found or not active"));

            if (!ObjectId.TryParse(formatId, out var _formatId))
                logger.LogInformation($"Invalid formatId - {formatId}");
                return(API.Process.GetError(ErrorCode.InvalidFormatId, "Invalid formatId"));

            if (string.IsNullOrEmpty(data.Input) && string.IsNullOrEmpty(data.InputCacheId))
                logger.LogInformation("Expected at either \"input\" or \"inputCacheId\" parameters, or both.");
                return(API.Process.GetError(ErrorCode.MissingParameters, "Expected at either \"input\" or \"inputCacheId\" parameters, or both."));

            var inputCacheId = data.InputCacheId;

            User   user;
            Format format;
            Task   addToCacheTask = null;

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                if (string.IsNullOrEmpty(inputCacheId))
                    inputCacheId = Formatik.GetRepeatableBase64HashCode(data.Input);

                // getting the cached input will be executed asyncroniously. We don't need the result from it
                // until we try to run the evaluation
                var getCachedInputQuery = string.IsNullOrEmpty(data.Input) ?
                                          GetCachedInputAsync(_userId, inputCacheId, asyncQueriesCancelationTokenSource.Token) :

                var db = Common.GetDB(configuration.DbConnection);

                var formatQuery = db
                                  .GetCollection <Format>("Formats")
                    Builders <Format> .Filter.And(
                        Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                        Builders <Format> .Filter.Eq(f => f._id, _formatId)),
                                  .ContinueWith((cursorTask) =>

                user = userQuery.Result;
                if (user == null)
                    logger.LogInformation($"User not found or not active - {userId}");
                    return(API.Process.GetError(ErrorCode.UserNotFound, "User not found or not active"));

                var cachedInput = getCachedInputQuery != null ? getCachedInputQuery.Result : null;
                if (cachedInput != null)
                    // we found our cached input - great
                    data.Input = getCachedInputQuery.Result;
                else if (getCachedInputQuery != null)
                    // we could not find the cached input - return error
                    logger.LogInformation($"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input.");
                    return(API.Process.GetError(ErrorCode.InputCacheNotFound, $"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input."));
                    // new input - cache it asyncronously, don't wait for result
                    addToCacheTask = CacheInputAsync(_userId, inputCacheId, data.Input);

                format = formatQuery.Result;

            if (format == null)
                logger.LogInformation($"Unable to find format {formatId}");
                return(API.Process.GetError(ErrorCode.InvalidFormatId, $"Unable to find format {formatId}"));

            using (var outputStream = new MemoryStream())
                int processed;
                int inputSize;
                int maxRecordCount = user.MaxRecordCount ?? 1000;

                using (var inputStream = new MemoryStream(Encoding.Unicode.GetBytes(data.Input)))
                    inputSize = (int)inputStream.Length;
                    processed = format.Formatik.Formatik.Process(inputStream, outputStream, Encoding.Unicode, maxRecordCount);

                outputStream.Seek(0, SeekOrigin.Begin);

                if (addToCacheTask != null)

                using (var reader = new StreamReader(outputStream))
                    return(new Process()
                        FormatId = _formatId.ToString(),
                        Name = format.Name,
                        Result = reader.ReadToEnd(),
                        InputSize = inputSize,
                        ProcessedRecords = processed,
                        Trunkated = processed >= maxRecordCount,
                        InputCacheId = inputCacheId
Пример #3
        public InputUpload UploadInput(string userId, IFormFile file)
            if (!ObjectId.TryParse(userId, out var _userId))
                logger.LogInformation($"User not found or not active - {userId}");
                return(InputUpload.GetError(ErrorCode.UserNotFound, "User not found or not active"));

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                var input = new MemoryStream();
                    file.CopyToAsync(input, asyncQueriesCancelationTokenSource.Token).Wait();

                    var user = userQuery.Result;
                    if (user == null)
                        logger.LogInformation($"User not found or not active - {userId}");
                        return(InputUpload.GetError(ErrorCode.UserNotFound, "User not found or not active"));

                    input.Seek(0, SeekOrigin.Begin);

                    // try to read as XLS and convert to CSV
                        using (var reader = ExcelReaderFactory.CreateReader(input))
                            var csvInput = new MemoryStream();
                            using (var writer = new StreamWriter(csvInput, Encoding.Unicode, 8192, true))
                                    if (reader.HeaderFooter != null && reader.HeaderFooter.FirstHeader != null)

                                    while (reader.Read())
                                            string.Join(",", Enumerable
                                                        .Range(0, reader.FieldCount)
                                                        .Select(i => reader.IsDBNull(i) ? "" : reader.GetValue(i).ToString())
                                                        .Select(strVal => strVal.Contains(",") ?
                                                                $"\"{strVal}\"" :
                                }while (reader.NextResult());

                            csvInput.Seek(0, SeekOrigin.Begin);
                            input = csvInput;
                    catch (Exception e)
                        input.Seek(0, SeekOrigin.Begin);

                    using (var reader = new StreamReader(input))
                        string data         = reader.ReadToEnd();
                        var    inputCacheId = Formatik.GetRepeatableBase64HashCode(data);
                        var    cacheTask    = CacheInputAsync(_userId, inputCacheId, data);

                        (InputFormat InputFormat, int Records)inputDetails;
                            inputDetails = Formatik.GetInputFormat(data, user.MaxRecordCount ?? 1000);
                        catch (UnsupportedFormatException)
                            return(InputUpload.GetError(ErrorCode.UnsupportedFormat, "Unsupported Format"));


                        return(new InputUpload()
                            InputCacheId = inputCacheId,
                            Input = data.Length > configuration.FileUploadMaxResultSize ? data.Substring(0, configuration.FileUploadMaxResultSize ?? 64000) + "..." : data,
                            Truncated = data.Length > configuration.FileUploadMaxResultSize,
                            InputFormat = inputDetails.InputFormat.ToString(),
                            Size = (int)input.Length,
                            Records = inputDetails.Records