示例#1
0
 public BsonFormatik(string input, string example, int maxInputRecords)
 {
     formatik = new Formatik(input, example)
     {
         MaxInputRecords = maxInputRecords
     };
 }
示例#2
0
        public void TestException(string id)
        {
            var f = new Formatik();

            if (f.Version == "")
            {
                ;
            }
            throw new Exception("Test Exception");
        }
示例#3
0
        public API.Process Process(string userId, string formatId, [FromBody] ProcessData data)
        {
            if (!ObjectId.TryParse(userId, out var _userId))
            {
                logger.LogInformation($"User not found or not active - {userId}");
                return(API.Process.GetError(ErrorCode.UserNotFound, "User not found or not active"));
            }

            if (!ObjectId.TryParse(formatId, out var _formatId))
            {
                logger.LogInformation($"Invalid formatId - {formatId}");
                return(API.Process.GetError(ErrorCode.InvalidFormatId, "Invalid formatId"));
            }

            if (string.IsNullOrEmpty(data.Input) && string.IsNullOrEmpty(data.InputCacheId))
            {
                logger.LogInformation("Expected at either \"input\" or \"inputCacheId\" parameters, or both.");
                return(API.Process.GetError(ErrorCode.MissingParameters, "Expected at either \"input\" or \"inputCacheId\" parameters, or both."));
            }

            var inputCacheId = data.InputCacheId;

            User   user;
            Format format;
            Task   addToCacheTask = null;

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
            {
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                if (string.IsNullOrEmpty(inputCacheId))
                {
                    inputCacheId = Formatik.GetRepeatableBase64HashCode(data.Input);
                }

                // getting the cached input will be executed asyncroniously. We don't need the result from it
                // until we try to run the evaluation
                var getCachedInputQuery = string.IsNullOrEmpty(data.Input) ?
                                          GetCachedInputAsync(_userId, inputCacheId, asyncQueriesCancelationTokenSource.Token) :
                                          null;

                var db = Common.GetDB(configuration.DbConnection);

                var formatQuery = db
                                  .GetCollection <Format>("Formats")
                                  .FindAsync(
                    Builders <Format> .Filter.And(
                        Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                        Builders <Format> .Filter.Eq(f => f._id, _formatId)),
                    null,
                    asyncQueriesCancelationTokenSource.Token)
                                  .ContinueWith((cursorTask) =>
                {
                    return(cursorTask.Result.FirstOrDefaultAsync(asyncQueriesCancelationTokenSource.Token).Result);
                });

                user = userQuery.Result;
                if (user == null)
                {
                    asyncQueriesCancelationTokenSource.Cancel();
                    logger.LogInformation($"User not found or not active - {userId}");
                    return(API.Process.GetError(ErrorCode.UserNotFound, "User not found or not active"));
                }

                var cachedInput = getCachedInputQuery != null ? getCachedInputQuery.Result : null;
                if (cachedInput != null)
                {
                    // we found our cached input - great
                    data.Input = getCachedInputQuery.Result;
                }
                else if (getCachedInputQuery != null)
                {
                    // we could not find the cached input - return error
                    asyncQueriesCancelationTokenSource.Cancel();
                    logger.LogInformation($"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input.");
                    return(API.Process.GetError(ErrorCode.InputCacheNotFound, $"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input."));
                }
                else
                {
                    // new input - cache it asyncronously, don't wait for result
                    addToCacheTask = CacheInputAsync(_userId, inputCacheId, data.Input);
                }

                format = formatQuery.Result;
            }

            if (format == null)
            {
                logger.LogInformation($"Unable to find format {formatId}");
                return(API.Process.GetError(ErrorCode.InvalidFormatId, $"Unable to find format {formatId}"));
            }

            using (var outputStream = new MemoryStream())
            {
                int processed;
                int inputSize;
                int maxRecordCount = user.MaxRecordCount ?? 1000;

                using (var inputStream = new MemoryStream(Encoding.Unicode.GetBytes(data.Input)))
                {
                    inputSize = (int)inputStream.Length;
                    processed = format.Formatik.Formatik.Process(inputStream, outputStream, Encoding.Unicode, maxRecordCount);
                }

                outputStream.Seek(0, SeekOrigin.Begin);

                if (addToCacheTask != null)
                {
                    addToCacheTask.Wait();
                }

                using (var reader = new StreamReader(outputStream))
                {
                    return(new Process()
                    {
                        FormatId = _formatId.ToString(),
                        Name = format.Name,
                        Result = reader.ReadToEnd(),
                        InputSize = inputSize,
                        ProcessedRecords = processed,
                        Trunkated = processed >= maxRecordCount,
                        InputCacheId = inputCacheId
                    });
                }
            }
        }
示例#4
0
        public Format Evaluate(string userId, [FromBody] EvaluateData data)
        {
            if (!ObjectId.TryParse(userId, out var _userId))
            {
                logger.LogInformation($"User not found or not active - {userId}");
                return(Format.GetError(ErrorCode.UserNotFound, "User not found or not active"));
            }

            if (string.IsNullOrEmpty(data.Name))
            {
                logger.LogInformation($"Expected \"name\" parameter");
                return(Format.GetError(ErrorCode.MissingParameters, "Expected \"name\" parameter"));
            }

            if (string.IsNullOrEmpty(data.Input) && string.IsNullOrEmpty(data.InputCacheId))
            {
                logger.LogInformation("Expected at either \"input\" or \"inputCacheId\" parameters, or both.");
                return(Format.GetError(ErrorCode.MissingParameters, "Expected at either \"input\" or \"inputCacheId\" parameters, or both."));
            }

            var inputCacheId = data.InputCacheId;

            if (string.IsNullOrEmpty(data.Example))
            {
                logger.LogInformation($"Expected \"example\" parameter");
                return(Format.GetError(ErrorCode.MissingParameters, $"Expected \"example\" parameter"));
            }

            var db = Common.GetDB(configuration.DbConnection);

            API.User user;
            Format   existingFormat;
            Task     addToCacheTask = null;

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
            {
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                if (string.IsNullOrEmpty(inputCacheId))
                {
                    inputCacheId = Formatik.GetRepeatableBase64HashCode(data.Input);
                }

                // getting the cached input will be executed asyncroniously. We don't need the result from it
                // until we try to run the evaluation
                var getCachedInputQuery = string.IsNullOrEmpty(data.Input) ?
                                          GetCachedInputAsync(_userId, inputCacheId, asyncQueriesCancelationTokenSource.Token) :
                                          null;

                var existingFormatQuery = db
                                          .GetCollection <Format>("Formats")
                                          .FindAsync(
                    Builders <Format> .Filter.And(
                        Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                        Builders <Format> .Filter.Eq(f => f.Name, data.Name)),
                    null,
                    asyncQueriesCancelationTokenSource.Token)
                                          .ContinueWith(cursor => cursor.Result
                                                        .ToEnumerable(asyncQueriesCancelationTokenSource.Token)
                                                        .FirstOrDefault(f => f.Formatik.Example == data.Example));

                user = userQuery.Result;
                if (user == null)
                {
                    asyncQueriesCancelationTokenSource.Cancel();
                    logger.LogInformation($"User not found or not active - {userId}");
                    return(Format.GetError(ErrorCode.UserNotFound, "User not found or not active"));
                }

                var cachedInput = getCachedInputQuery != null ? getCachedInputQuery.Result : null;
                if (cachedInput != null)
                {
                    // we found our cached input - great
                    data.Input = getCachedInputQuery.Result;
                }
                else if (getCachedInputQuery != null)
                {
                    // we could not find the cached input - return error
                    asyncQueriesCancelationTokenSource.Cancel();
                    logger.LogInformation($"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input.");
                    return(Format.GetError(ErrorCode.InputCacheNotFound, $"Input \"{data.InputCacheId}\" is no longer cached. Please resubmit input."));
                }
                else
                {
                    // new input - cache it asyncronously, don't wait for result
                    addToCacheTask = CacheInputAsync(_userId, inputCacheId, data.Input);
                }

                existingFormat = existingFormatQuery.Result;
            }

            if (existingFormat != null)
            {
                var inputHash   = Formatik.GetRepeatableHashCode(data.Input);
                var exampleHash = Formatik.GetRepeatableHashCode(data.Example);

                if ((existingFormat.Formatik.InputHash == inputHash) ||
                    existingFormat.Formatik.ExampleHash == exampleHash)
                {
                    existingFormat.InputCacheId = inputCacheId;
                    return(existingFormat);
                }
                else
                {
                    // we found a format with this name, however the input and example don't match
                    // the current parameters - update the existing doc
                    // Async operation - dont wait for completion
                    db.GetCollection <Format>("Formats")
                    .UpdateOneAsync(
                        Builders <Format> .Filter.And(
                            Builders <Format> .Filter.Eq(f => f.UserId, _userId),
                            Builders <Format> .Filter.Eq(f => f.Name, data.Name)),
                        Builders <Format> .Update
                        .Set(f => f.Formatik.InputHash, inputHash)
                        .Set(f => f.Formatik.Example, data.Example)
                        .Set(f => f.Formatik.ExampleHash, exampleHash)
                        .Set(f => f.LastUpdated, DateTime.Now));
                }
            }

            BsonFormatik format;

            try
            {
                format = new BsonFormatik(data.Input, data.Example, user.MaxRecordCount ?? 1000);
            }
            catch (FormatikException e)
            {
                logger.LogInformation(e.Message);
                return(Format.GetError(ErrorCode.EvaluationError, e.Message));
            }

            // search by format hash
            existingFormat = db
                             .GetCollection <Format>("Formats")
                             .Find(Builders <Format> .Filter.And(
                                       Builders <Format> .Filter.Eq(f => f.UserId, user._id),
                                       Builders <Format> .Filter.Eq(f => f.Formatik.Hash, format.Hash)))
                             .ToEnumerable()
                             .FirstOrDefault(f => format == f.Formatik);

            if (existingFormat != null)
            {
                // We found a Format with same hash, however if we got to this point it means it has a different name
                // Update the name of the existing document
                // Async operation - don't wait for completion
                if (existingFormat.Name != data.Name)
                {
                    db.GetCollection <Format>("Formats")
                    .UpdateOneAsync(
                        Builders <Format> .Filter.And(
                            Builders <Format> .Filter.Eq(f => f.UserId, user._id),
                            Builders <Format> .Filter.Eq(f => f.Formatik.Hash, format.Hash)),
                        Builders <Format> .Update
                        .Set(f => f.Name, data.Name)
                        .Set(f => f.LastUpdated, DateTime.Now));
                }

                existingFormat.InputCacheId = inputCacheId;
                existingFormat.InputSize    = data.Input.Length;
                existingFormat.InputRecords = format.InputRecords;
                return(existingFormat);
            }

            if (addToCacheTask != null)
            {
                addToCacheTask.Wait();
            }

            // If we've reached this point there is no entry in the DB matching this format - insert one
            var now = DateTime.Now;

            var newFormat = new Format()
            {
                _id          = ObjectId.GenerateNewId(),
                UserId       = user._id,
                Name         = data.Name,
                Created      = now,
                Formatik     = format,
                InputCacheId = inputCacheId,
                InputSize    = data.Input.Length,
                InputRecords = format.InputRecords,
                Temporary    = data.Temporary ? (DateTime?)now : null
            };

            // no need to wait for save, assume it does
            db.GetCollection <Format>("Formats")
            .InsertOneAsync(newFormat);

            return(newFormat);
        }
示例#5
0
        public InputUpload UploadInput(string userId, IFormFile file)
        {
            if (!ObjectId.TryParse(userId, out var _userId))
            {
                logger.LogInformation($"User not found or not active - {userId}");
                return(InputUpload.GetError(ErrorCode.UserNotFound, "User not found or not active"));
            }

            using (var asyncQueriesCancelationTokenSource = new CancellationTokenSource())
            {
                var userQuery = GetUserAsync(_userId, asyncQueriesCancelationTokenSource.Token);

                var input = new MemoryStream();
                try
                {
                    file.CopyToAsync(input, asyncQueriesCancelationTokenSource.Token).Wait();

                    var user = userQuery.Result;
                    if (user == null)
                    {
                        asyncQueriesCancelationTokenSource.Cancel();
                        logger.LogInformation($"User not found or not active - {userId}");
                        return(InputUpload.GetError(ErrorCode.UserNotFound, "User not found or not active"));
                    }

                    input.Seek(0, SeekOrigin.Begin);

                    // try to read as XLS and convert to CSV
                    try
                    {
                        using (var reader = ExcelReaderFactory.CreateReader(input))
                        {
                            var csvInput = new MemoryStream();
                            using (var writer = new StreamWriter(csvInput, Encoding.Unicode, 8192, true))
                            {
                                do
                                {
                                    if (reader.HeaderFooter != null && reader.HeaderFooter.FirstHeader != null)
                                    {
                                        writer.WriteLine(reader.HeaderFooter.FirstHeader);
                                    }

                                    while (reader.Read())
                                    {
                                        writer.WriteLine(
                                            string.Join(",", Enumerable
                                                        .Range(0, reader.FieldCount)
                                                        .Select(i => reader.IsDBNull(i) ? "" : reader.GetValue(i).ToString())
                                                        .Select(strVal => strVal.Contains(",") ?
                                                                $"\"{strVal}\"" :
                                                                strVal
                                                                )
                                                        )
                                            );
                                    }
                                }while (reader.NextResult());
                            }

                            csvInput.Seek(0, SeekOrigin.Begin);
                            input.Dispose();
                            input = csvInput;
                        }
                    }
                    catch (Exception e)
                    {
                        input.Seek(0, SeekOrigin.Begin);
                    }

                    using (var reader = new StreamReader(input))
                    {
                        string data         = reader.ReadToEnd();
                        var    inputCacheId = Formatik.GetRepeatableBase64HashCode(data);
                        var    cacheTask    = CacheInputAsync(_userId, inputCacheId, data);

                        (InputFormat InputFormat, int Records)inputDetails;
                        try
                        {
                            inputDetails = Formatik.GetInputFormat(data, user.MaxRecordCount ?? 1000);
                        }
                        catch (UnsupportedFormatException)
                        {
                            return(InputUpload.GetError(ErrorCode.UnsupportedFormat, "Unsupported Format"));
                        }

                        cacheTask.Wait();

                        return(new InputUpload()
                        {
                            InputCacheId = inputCacheId,
                            Input = data.Length > configuration.FileUploadMaxResultSize ? data.Substring(0, configuration.FileUploadMaxResultSize ?? 64000) + "..." : data,
                            Truncated = data.Length > configuration.FileUploadMaxResultSize,
                            InputFormat = inputDetails.InputFormat.ToString(),
                            Size = (int)input.Length,
                            Records = inputDetails.Records
                        });
                    }
                }
                finally
                {
                    input.Dispose();
                }
            }
        }
示例#6
0
 public BsonFormatik()
 {
     formatik = new Formatik();
 }