コード例 #1
        public static double[] Start1(IProgress <double> pr)
            double[] EpisodeRewards = new double[Episodes];

            var OrderQuantities    = new int[OrderQuantitiesCount1 + 1];
            var InventoryPositions = new int[InventoryPositionCount_S1 + 1];
            var RemainingLives     = new int[RemainingLivesCount_S1 + 1];

                Task.Factory.StartNew(() =>
                for (int i = 0; i <= OrderQuantitiesCount1; i++)
                    OrderQuantities[i] = i;
                Task.Factory.StartNew(() =>
                for (int i = 0; i <= InventoryPositionCount_S1; i++)
                    InventoryPositions[i] = i;
                Task.Factory.StartNew(() =>
                for (int i = 0; i <= RemainingLivesCount_S1; i++)
                    RemainingLives[i] = i;

            var Table1 = new QTable1(InventoryPositions, RemainingLives, OrderQuantities);

            var randm = new Random(DateTime.UtcNow.Millisecond);

            double Epsilon = 1;
            int    ep      = 0;
            var    itr     = MathFunctions.GetDemand(a, b, new Random(DateTime.UtcNow.Millisecond)).GetEnumerator();

            while (ep < Episodes)
                double ep_reward      = 0;
                var    ProductsOnHand = new List <Product>(InventoryPositionCount_S1);

                for (int step = 0; step < MaxStepPerEpisode; step++)
                    var actual_demand = itr.Current;

                    int life_rem = ProductsOnHand.Sum(p => Product.LIFE_SPAN - p.LifeSpent);

                    // determine order quantity
                    int oq;
                    int total_product_count = ProductsOnHand.Count;
                    int max_oq  = InventoryPositionCount_S1 - total_product_count;
                    var dec_rnd = randm.NextDouble();

                    if (dec_rnd < Epsilon) // explore
                        oq = OrderQuantities[randm.Next(max_oq)];
                    else // exploit
                        var key = Table1.GetMaxOrderQuantityForState(total_product_count, life_rem, max_oq);
                        oq = key.Action;

                    // recieve the arrived products that was ordered previously
                    for (int i = oq - 1; i >= 0; i--)
                        ProductsOnHand.Add(new Product(0));

                    // calculate shortage
                    int Ts = Math.Max(0, actual_demand - ProductsOnHand.Count);

                    // remove the products consumed by customers
                    if (ProductsOnHand.Count > 0)
                        if (actual_demand >= ProductsOnHand.Count)
                            // removing the oldest products
                            ProductsOnHand.RemoveRange(0, actual_demand);

                    // discard outdated product and calculate outage amount
                    int To           = 0;
                    var new_life_rem = 0;
                    for (int i = ProductsOnHand.Count - 1; i >= 0; i--)
                        if (ProductsOnHand[i].LifeSpent > Product.LIFE_SPAN)
                            new_life_rem += Product.LIFE_SPAN - ProductsOnHand[i].LifeSpent;

                    double reward = Ts * ShortageCost + To * OutageCost;

                    ep_reward += reward;

                    // calculate max q(s',a')
                    // quantity in next round = on hand inventory + orders in transit - next day demand
                    var new_total_product_count = ProductsOnHand.Count;
                    var next_maxq_key           = Table1.GetMaxOrderQuantityForState(new_total_product_count, new_life_rem, OrderQuantities.Last() - new_total_product_count);
                    var max_q_future            = Table1[next_maxq_key];

                    // update q table
                    var state   = new QuantityLifeState(total_product_count, life_rem);
                    var sa_pair = new QTableKey1(state, oq);
                    Table1[sa_pair] = (1 - LearningRate) * Table1[sa_pair] + LearningRate * (reward + FutureDiscount * max_q_future);

                EpisodeRewards[ep] = ep_reward;
                Epsilon           *= EpsilonDecay;
                //LearningRate *= EpsilonDecay;
                if (ep % 50 == 0)
                    pr.Report(ep * 100.0 / Episodes);

コード例 #2
 /// <param name="actn">order quantity</param>
 public QTableKey1(QuantityLifeState state, int actn)
     State  = state;
     Action = actn;