Exemplo n.º 1
        // ReSharper disable once InconsistentNaming
        private static void OutputRandomCsv(
            ManyRandomOrders ordersToOutput,
            List <Customer> customersToOutput,
            string ordersFilePathToWriteTo,
            string customersFilePathToWriteTo)
            // Write the random orders to a CSV file
            using StreamWriter ordersStreamWriter = new StreamWriter($"{ordersFilePathToWriteTo}.csv");
            using CsvWriter ordersCsvWriter       = new CsvWriter(ordersStreamWriter, CultureInfo.InvariantCulture);
                var flattenedManyRandomOrders =
                    from order in ordersToOutput.Orders
                    from cartItem in order.CartItems
                    select new
                    CustomerId = order.CustomerId,
                    Sku        = cartItem.Sku,
                    Quantity   = cartItem.Quantity,
                    OrderDate  = order.OrderDate


            if (customersToOutput != null)
                // Write the customers to a CSV file
                using StreamWriter customersStreamWriter = new StreamWriter($"{customersFilePathToWriteTo}.csv");
                using CsvWriter customersCsvWriter       = new CsvWriter(customersStreamWriter, CultureInfo.InvariantCulture);
Exemplo n.º 2
        /// <summary>
        /// Generation complete - output CSV, JSON or both
        /// </summary>
        // ReSharper disable once InconsistentNaming
        private static void OutputData(ManyRandomOrders ordersToOutput, List <Customer> customersToOutput)
            Console.WriteLine("Output CSV or JSON or both? [cC]|[jJ]|[bB]");
            string jsonOrCsvResponse = Console.ReadLine();

            // Default to CSV output
            bool outputCsv = true;

            bool outputBoth = false;

            const char useJson = 'j';
            const char useCsv  = 'c';
            const char useBoth = 'b';

            switch (jsonOrCsvResponse?.ToLowerInvariant()[0])
            case useJson:
                outputCsv = false;

            case useCsv:

            case useBoth:
                outputBoth = true;

                Console.WriteLine("Invalid response. Defaulting to CSV");

            string ordersFilePathToWriteTo    = @$ "RandomOrders-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}";
            string customersFilePathToWriteTo = @$ "RandomCustomers-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}";

            if (outputBoth)
                OutputRandomCsv(ordersToOutput, customersToOutput, ordersFilePathToWriteTo, customersFilePathToWriteTo);
                OutputRandomJson(ordersToOutput, customersToOutput, ordersFilePathToWriteTo, customersFilePathToWriteTo);
                if (outputCsv)
                    OutputRandomCsv(ordersToOutput, customersToOutput, ordersFilePathToWriteTo, customersFilePathToWriteTo);
                    OutputRandomJson(ordersToOutput, customersToOutput, ordersFilePathToWriteTo, customersFilePathToWriteTo);
Exemplo n.º 3
        // ReSharper disable once InconsistentNaming
        private static void OutputRandomJson(
            ManyRandomOrders ordersToOutput,
            List <Customer> customersToOutput,
            string ordersFilePathToWriteTo,
            string customersFilePathToWriteTo)
            // Serialize the random orders and write to a JSON file
            string ordersJson = JsonConvert.SerializeObject(ordersToOutput, Formatting.None); // None for file size

            File.WriteAllText(@$ "{ordersFilePathToWriteTo}.json", ordersJson);

            if (customersToOutput != null)
                string customersJson = JsonConvert.SerializeObject(customersToOutput, Formatting.None); // None for file size
                File.WriteAllText(@$ "{customersFilePathToWriteTo}.json", customersJson);
Exemplo n.º 4
        /// <summary>
        /// Generate random orders and customers. Include some telecom fields on the customer to mimic
        /// a common Kaggle dataset (telecom customers) - but associate customer IDs to the random orders
        /// </summary>
        /// <param name="args">true to use retail.dat (from http://fimi.uantwerpen.be/data/) for the orders</param>
        static void Main(string[] args)
            // Used for outputting generation time
            Stopwatch sw = new Stopwatch();


            if (args.Length == 1 && bool.TryParse(args[0], out bool useRetailDat))
                #region Use retail.dat file to generate orders with not-so-random customer IDs, emails and order dates
                if (useRetailDat)
                    ManyRandomOrders notSoRandomOrders = new ManyRandomOrders
                        Orders = new List <CartOrder>()

                    string line;
                    // Read the retail.dat file
                    StreamReader file = new StreamReader(@"retail.dat");
                    while ((line = file.ReadLine()) != null)
                        int[] skusInTransaction = line.Trim().Split(" ").Select(int.Parse).ToArray();
                        int[] distinctSkus      = skusInTransaction.Distinct().ToArray();

                        List <CartItem> cartItems = new List <CartItem>();
                        foreach (int sku in distinctSkus)
                            cartItems.Add(new CartItem
                                Sku      = sku.ToString(),
                                Quantity = skusInTransaction.Count(i => i == sku)

                        CartOrder cartOrder = new CartOrder
                            CustomerId = _random.Next(StartCustomersAt, StartCustomersAt + MaximumNumberOfCustomers),
                            Email      = RandomEmail(),
                            OrderDate  = RandomDateTime(),
                            CartItems  = cartItems


                    // Don't include serialization in generation time metric

                    Console.WriteLine($"Generation time: {sw.ElapsedMilliseconds} milliseconds.");

                    OutputData(notSoRandomOrders, null);
                // Don't use retail.dat (random SKUs & quantities)

                // List of possible SKUs up to MaximumLength (e.g.: 0001, 0002, 0003, ...9999)
                List <string> allSkuCombinations =
                    Enumerable.Range(1, MaximumLengthOfSku)
                    .SelectMany(count =>
                                // Cartesian product
                                Enumerable.Repeat(CharactersToUse, count).CartesianProduct())
                    .Select(combination =>
                            new string(combination.ToArray()))

                // Duplicate each SKU up to MaximumSkuQuantityVariance times
                List <string> allSkuCombinationsWithVariableDuplicates =
                    .SelectMany(sku =>
                                Enumerable.Repeat(sku, _random.Next(1, MaximumSkuQuantityVariance)))

                // Random cart items
                CartItem[] manyRandomCartItems =
                    .Select(sku =>
                            new CartItem
                    Sku      = sku,
                    Quantity = _random.Next(1, MaximumSkuQuantity)

                // Random list of indices in order to pick random cart items from manyRandomCartItems
                int[] randomIndicesToChooseFrom =
                    // Fisher-Yates shuffle
                    FisherYatesShuffle.RandomIndices(_random, manyRandomCartItems.Length);

                // Many random orders (to output)
                ManyRandomOrders manyRandomOrders = new ManyRandomOrders
                    Orders = new List <CartOrder>()

                for (int orderIndex = 0; orderIndex < OrdersToGenerate; orderIndex++)
                    CartOrder cartOrder = new CartOrder
                        CustomerId = _random.Next(StartCustomersAt, StartCustomersAt + MaximumNumberOfCustomers),
                        OrderDate  = RandomDateTime()

                    // Randomize the number of cart items purchased by this customer
                    int numberOfItemsInThisOrder = _random.Next(1, MaximumCartItemQuantity);

                    List <CartItem> cartItemsToAddToThisOrder = new List <CartItem>();
                    for (int cartItemIndex = 0; cartItemIndex < numberOfItemsInThisOrder; cartItemIndex++)
                        // The index of the 'random indices' array to select the cart item to use when constructing each order
                        int indexOfRandomIndexToChoose = _random.Next(0, randomIndicesToChooseFrom.Length - 1);

                        // Choose a random index from the 'random indices' array
                        int indexOfCartItemToUse = randomIndicesToChooseFrom[indexOfRandomIndexToChoose];

                        // Use this random index to get a random cart item

                    cartOrder.CartItems = cartItemsToAddToThisOrder;


                // Group orders by customer
                List <Customer> manyRandomCustomers =
                    (from order in manyRandomOrders.Orders
                     group order by order.CustomerId
                     into grouped
                     select new Customer
                    CustomerId = grouped.Key,
                    AccountLength = (float)(DateTime.Today - grouped.Min(i => i.OrderDate)).TotalDays,
                    DaysSinceLastPurchase = (float)(DateTime.Today - grouped.Max(i => i.OrderDate)).TotalDays,
                    Email = RandomEmail()

                // Make some telecom-esque customers with the same customer IDs present in our random orders
                foreach (Customer customer in manyRandomCustomers)
                    (string areaCode, string phoneNumber) = RandomAreaCodeAndPhoneNumber();
                    customer.AreaCode    = areaCode;
                    customer.PhoneNumber = phoneNumber;

                    // Assume churned if they haven't purchased in 6 months
                    customer.Churned = customer.DaysSinceLastPurchase > 182.5;

                    customer.Voice = RandomBoolean();

                    if (customer.Voice)
                        bool internationalPlan = RandomBoolean();
                        if (internationalPlan)
                            #region International calls
                            // Calls max 2/3 of account length
                            customer.TotalInternationalCalls = RandomFloatTwoThirdsProportional(customer.AccountLength);
                            // Minutes max 2/1 of number of calls
                            customer.TotalInternationalMinutes = RandomFloatTwiceProportional(customer.TotalInternationalCalls);
                            // Charges max 1/4 of number of minutes
                            customer.TotalInternationalCharges = RandomFloatOneQuarterProportional(customer.TotalInternationalMinutes);

                        #region Daytime calls
                        // Calls max 2/3 of account length
                        customer.TotalDaytimeCalls = RandomFloatTwoThirdsProportional(customer.AccountLength);
                        // Minutes max 2/1 of number of calls
                        customer.TotalDaytimeMinutes = RandomFloatTwiceProportional(customer.TotalDaytimeCalls);
                        // Charges max 1/4 of number of minutes
                        customer.TotalDaytimeCharges = RandomFloatOneQuarterProportional(customer.TotalDaytimeMinutes);

                        #region Evening calls
                        // Calls max 2/3 of account length
                        customer.TotalEveningCalls = RandomFloatTwoThirdsProportional(customer.AccountLength);
                        // Minutes max 2/1 of number of calls
                        customer.TotalEveningMinutes = RandomFloatTwiceProportional(customer.TotalEveningCalls);
                        // Charges max 1/4 of number of minutes
                        customer.TotalEveningCharges = RandomFloatOneQuarterProportional(customer.TotalEveningMinutes);

                        #region Night calls
                        // Calls max 2/3 of account length
                        customer.TotalNightCalls = RandomFloatTwoThirdsProportional(customer.AccountLength);
                        // Minutes max 2/1 of number of calls
                        customer.TotalNightMinutes = RandomFloatTwiceProportional(customer.TotalNightCalls);
                        // Charges max 1/4 of number of minutes
                        customer.TotalNightCharges = RandomFloatOneQuarterProportional(customer.TotalNightMinutes);

                    customer.NumberOfCustomerServiceCalls = RandomFloatTwoThirdsProportional(customer.AccountLength);
                    customer.NumberOfMessages             = RandomFloatTwoThirdsProportional(customer.AccountLength);

                // Don't include serialization in generation time metric

                Console.WriteLine($"Generation time: {sw.ElapsedMilliseconds} milliseconds.");

                OutputData(manyRandomOrders, manyRandomCustomers);