예제 #1
0
 /// <param name="actn">order quantity</param>
 public QTableKey1(QuantityLifeState state, int actn)
 {
     State  = state;
     Action = actn;
 }
예제 #2
0
        public static void Start()
        {
            var randm = new Random(DateTime.UtcNow.Millisecond);

            int life_rem = 0;

            for (int i = 0; i < 20; i++)
            {
                var p = new Product();
                ProductsOnHand.Add(p);
                life_rem += p.LifeRemaining;
            }

            int ep            = 0;
            var itr           = MathFunctions.GetDemand(a, b, new Random(DateTime.UtcNow.Millisecond)).GetEnumerator();
            var actual_demand = itr.Current;

            while (ep < Episodes)
            {
                double ep_reward = 0;
                double reward    = 0.0;

                itr.MoveNext();
                var next_demand = itr.Current;

                for (int step = 0; step < MaxStepPerEpisode; step++)
                {
                    // recieve the arrived products that was ordered previously
                    for (int i = OrdersNotArrived.Count - 1; i >= 0; i--)
                    {
                        if (OrdersNotArrived[i].RemainingDaysToArrive == 0)
                        {
                            var n_new_product = OrdersNotArrived[i].Quantity;
                            for (int j = 0; j < n_new_product; j++)
                            {
                                var prdct = new Product();
                                prdct.LifeRemaining -= Order.LeadTime;
                                ProductsOnHand.Add(prdct);
                            }
                            OrdersNotArrived.RemoveAt(i);
                        }
                    }

                    // calculate shortage
                    int Ts = Math.Max(0, actual_demand - ProductsOnHand.Count);
                    reward += Ts * ShortageCost;

                    // remove the products consumed by customers
                    if (ProductsOnHand.Count > 0)
                    {
                        for (int i = 0; i < actual_demand; i++)
                        {
                            ProductsOnHand.RemoveAt(0);
                            reward += SalePrice;
                            if (ProductsOnHand.Count == 0)
                            {
                                break;
                            }
                        }
                    }

                    // discard outdated product and calculate outage amount
                    int To = 0;
                    life_rem = 0;
                    for (int i = ProductsOnHand.Count - 1; i >= 0; i--)
                    {
                        ProductsOnHand[i].LifeRemaining--;
                        if (ProductsOnHand[i].LifeRemaining <= 0)
                        {
                            To++;
                            ProductsOnHand.RemoveAt(i);
                        }
                        else
                        {
                            life_rem += ProductsOnHand[i].LifeRemaining;
                        }
                    }
                    reward += To * OutageCost;

                    ep_reward += reward;
                    if (ep_reward > 800)
                    {
                        break;
                    }

                    // determine order quantity
                    int oq, next_oq;
                    int total_product_count = ProductsOnHand.Count + OrdersNotArrived.Sum(o => o.Quantity);
                    if (randm.NextDouble() < Epsilon) // explore
                    {
                        oq = OrderQuantities[randm.Next(OrderQuantities.Length)];
                    }
                    else // exploit
                    {
                        oq = Table1.GetMaxOrderQuantityForState(total_product_count, life_rem, OrderQuantities.First(), OrderQuantities.Last());
                    }

                    // mustn't exceed inventory capacity
                    if (total_product_count + oq >= InventoryPositionCount_S1)
                    {
                        oq = InventoryPositionCount_S1 - 1 - total_product_count;
                    }

                    OrdersNotArrived.Add(new Order(oq));
                    reward = oq * OrderingCost;

                    for (int i = 0; i < OrdersNotArrived.Count; i++)
                    {
                        life_rem += L - (Order.LeadTime - OrdersNotArrived[i].RemainingDaysToArrive);
                        OrdersNotArrived[i].RemainingDaysToArrive--;
                    }

                    //double reward = -To * co - Ts * cs;
                    //Console.Write(reward.ToString("N2") + ",  ");
                    //if (ep % 20 == 0)
                    //    Console.WriteLine();

                    // calculate max q(s',a')
                    // quantity in next round = on hand inventory + orders in transit - next day demand
                    total_product_count = ProductsOnHand.Count + OrdersNotArrived.Sum(o => o.Quantity);
                    int qntity_next_round = total_product_count - next_demand;
                    next_oq = Table1.GetMaxOrderQuantityForState(total_product_count, life_rem, OrderQuantities.First(), OrderQuantities.Last());

                    // update q table
                    var state   = new QuantityLifeState(total_product_count, life_rem);
                    var sa_pair = new QTableKey1(state, oq);
                    Table1[sa_pair] = (1 - LearningRate) * Table1[sa_pair] + LearningRate * (reward + FutureDiscount * next_oq);

                    actual_demand = next_demand;
                }

                EpisodeRewards[ep] = ep_reward;
                Console.WriteLine(ep_reward);

                ep_reward = 0;
                Epsilon  *= EpsilonDecay;
                ep++;
            }
        }