/// <summary>
        /// Initializes a list of state machines, to be aggregated together to form the final HMM for Viterbi inference at field level.
        /// </summary>
        /// <param name="machines">Stores the list of state machines which are aggregated.</param>
        /// <param name="states">List of all the states, present in the aggregated HMM.</param>
        /// <param name="startState">The starting state of the HMM.</param>
        public static void GeneralParse(ref List<StateMachine> machines, ref List<State> states, ref State startState,
            List<UserState> userStates = null)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetText(10),
                                       GetPhoneNumber_All(8, userStates),
                                       GetTimeStamp_All(4),
                                       GetSamsungSMSMarker(3),
                                       GetNokiaRecordEnd(1),
                                       //Get7BitString_WithLength(8)
                                   };
            // Add user-defined states. GetPhoneNumber_All() will add the user-defined
            // phone states.
            if (userStates != null)
            {
                foreach (UserState us in userStates)
                {
                    if (us.MachineType == MachineList.TimeStamp_User)
                    {
                        testMachines.Add(GetTimestamp_UserDefined(us, 4));
                    }
                    else if (us.MachineType == MachineList.Text_User)
                    {
                        testMachines.Add(GetText_UserDefined(us, 8));
                    }
                }
            }

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
Exemple #2
0
        /// <summary>
        /// Calls static methods of StateMachine to prepare the state machines with their emission/transition probabilities for inference, according to the Runtype.
        /// </summary>
        /// <param name="type">The Run type, whether field level, record level, only for phone numbers etc.</param>
        /// <param name="isAnchor"></param>
        /// <param name="userStates">Any user defined state machines.</param>
        public Viterbi(RunType type, bool isAnchor, List<UserState> userStates=null)
        {
            #if !PRINT_FIELD
            Console.WriteLine("Not Printing fields!");
            #endif

            _runType = type;
            _isAnchor = isAnchor;

            _machines = new List<StateMachine>();
            _states = new List<State>();
            _startState = new State();
            _textList = new List<string>();
            _fieldList = new List<ViterbiField>();
            _userStates = userStates ?? new List<UserState>();

            switch (_runType)
            {
                case RunType.BinaryOnly:
                    StateMachine.TestBinaryOnly(ref _machines, ref _states, ref _startState);
                    break;
                case RunType.GeneralParse:
                    StateMachine.GeneralParse(ref _machines, ref _states, ref _startState, _userStates);
                    break;

                case RunType.PhoneNumberOnly:
                    StateMachine.TestPhoneOnly(ref _machines, ref _states, ref _startState);
                    break;

                case RunType.PhoneNumberAndText:
                    //StateMachine.TestPhoneNumberAndText(ref _machines, ref _states, ref _startState);
                    StateMachine.TestPhoneNumberAndTextMachine(ref _machines, ref _states, ref _startState);
                    break;

                case RunType.TextOnly:
                    StateMachine.TestTextOnly(ref _machines, ref _states, ref _startState);
                    break;

                case RunType.PhoneNumberTextAndTimeStamp:
                    StateMachine.TestPhoneNumberTextAndTimeStamp(ref _machines, ref _states, ref _startState);
                    break;

                case RunType.Meta:
                    StateMachine.TestMeta(ref _machines, ref _states, ref _startState);
                    break;

                case RunType.AnchorPoints:
                    StateMachine.TestAnchorFieldsOnly(ref _machines, ref _states, ref _startState, _userStates);
                    break;

                case RunType.Moto:
                    StateMachine.TestMoto(ref _machines, ref _states, ref _startState);
                    break;
                default:
                    throw new ArgumentOutOfRangeException("type");
            }

            Console.WriteLine("Viterbi set to {0}", Convert.ToString(type));
        }
        public static void TestTextOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetText(1)
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        /// <summary>
        /// Gets the state machine for a UNIX timestamp.
        /// </summary>
        /// <param name="weight">Weight of TimeStamp_Unix state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to TimeStamp_Unix state machine.</param>
        /// <returns></returns>
        public static StateMachine GetTimestamp_Unix(int weight)
        {
            var timestamp = new StateMachine { Name = MachineList.TimeStamp_Unix, _weight = weight };

            State unixTime1 = new State { Name = "UnixTime1", ParentStateMachine = timestamp };
            State unixTime2 = new State { Name = "UnixTime2", ParentStateMachine = timestamp };
            State unixTime3 = new State { Name = "UnixTime3", ParentStateMachine = timestamp };
            UnixTimeState unixTime4 = new UnixTimeState { Name = "UnixTime4", ParentStateMachine = timestamp };

            timestamp.AddState(unixTime1);
            timestamp.AddState(unixTime2);
            timestamp.AddState(unixTime3);
            timestamp.AddState(unixTime4); ;

            timestamp.StartingStates.Add(unixTime1);
            timestamp.EndingStates.Add(unixTime4);

            AddTransition(unixTime1, unixTime2, 1d);
            AddTransition(unixTime2, unixTime3, 1d);
            AddTransition(unixTime3, unixTime4, 1d);

            // BL
            DateTime epochTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
            //DateTime startTime = new DateTime(Math.Max(TimeConstants.START_YEAR, 2011), 1, 1, 0, 0, 0, DateTimeKind.Utc);
            DateTime startTime = new DateTime(TimeConstants.START_YEAR, 1, 1, 0, 0, 0, DateTimeKind.Utc);
            DateTime endTime = new DateTime(TimeConstants.END_YEAR, 12, 31, 23, 59, 59, DateTimeKind.Utc);
            uint startEpoch = (uint)(startTime - epochTime).TotalSeconds;
            uint endEpoch = (uint)(endTime - epochTime).TotalSeconds;
            byte[] startEpochBytes = BitConverter.GetBytes(startEpoch);
            byte[] endEpochBytes = BitConverter.GetBytes(endEpoch);
            // big-endian
            for (byte i = startEpochBytes[3]; i <= endEpochBytes[3]; i++)
            {
                unixTime1.PossibleValueProbabilities[i] = 1d;
            }
            unixTime1.NormalizeProbabilities();

            for (int i = 0; i < 256; i++)
            {
                unixTime2.PossibleValueProbabilities[i] = 1 / 256d;
                unixTime3.PossibleValueProbabilities[i] = 1 / 256d;
                unixTime4.PossibleValueProbabilities[i] = 1 / 256d;
            }
            return timestamp;
        }
        /// <summary>
        /// Uses the state machines in the list initialized by GeneralParse() method and adds transitions to the beginnings 
        /// and endings of those state machines, so as to get the final, aggregated HMM.
        /// </summary>
        /// <param name="machinesToTest">List of state machines to be aggregated.</param>
        /// <param name="machines">List of state machines to be aggregated, with the ListIndex field of each state in each state machine, storing its serial number.</param>
        /// <param name="states">List of all states present in the aggregated HMM.</param>
        /// <param name="startState">The starting state of the final, aggreagted HMM.</param>
        public static void TestStateMachines(List<StateMachine> machinesToTest, ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            StateMachine start = GetStart();
            StateMachine binary = GetBinary();
            //StateMachine binaryFF = GetBinaryFF();

            binary.Probability = 0.1d;   //0.9
            //binaryFF.Probability = 0.01d;
            double otherMachinesProbability = 1d - binary.Probability; // -binaryFF.Probability;

            startState = start.StartingState;

            machines.Add(binary);
            //machines.Add(binaryFF);

            int weightSum = (from machine in machinesToTest select machine._weight).Sum();

            for (int i = 0; i < machinesToTest.Count; i++)
            {

                Console.WriteLine("Testing machine {0}", machinesToTest[i].Name);

                machinesToTest[i].Probability = (otherMachinesProbability * machinesToTest[i]._weight) / weightSum;

                machines.Add(machinesToTest[i]);

            }

            AddStatesToMainList(ref machines, ref states);

            //Need to add transitions to and from all state machines. Exception, don't add transition from binary to itself
            for (int i = 0; i < machines.Count; i++)
            {
                AddTransitionToStateMachine(start, machines[i], machines[i].Probability);

                for (int j = 0; j < machines.Count; j++)
                {
                    if (machines[i] == binary && machines[j] == binary)
                        continue;
                    AddTransitionToStateMachine(machines[i], machines[j], machines[j].Probability);
                }

                for (int j = 0; j < machines[i].EndingStates.Count; j++)
                {
                    machines[i].EndingStates[j].IsEndingState = true;
                }
            }
        }
        /// <summary>
        /// Gets the meta state machine for an address book entry.
        /// </summary>
        /// <param name="weight">Weight of AddressBook state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to AddressBook state machine.</param>
        /// <returns></returns>
        public static StateMachine GetMeta_AddressBook(int weight)
        {
            StateMachine metaAddressBook = new StateMachine { Name = MachineList.Meta_AddressBook, _weight = weight };

            State text0 = new State { Name = "Text0", ParentStateMachine = metaAddressBook };
            State text = new State { Name = "Text", ParentStateMachine = metaAddressBook };
            State binary = new State { Name = "Binary", ParentStateMachine = metaAddressBook, IsBinary = true };
            State binary0 = new State { Name = "Binary0", ParentStateMachine = metaAddressBook, IsBinary = true };
            State prepend = new State { Name = "Prepend", ParentStateMachine = metaAddressBook };
            State phoneNumber = new State { Name = "PhoneNumber", ParentStateMachine = metaAddressBook };

            metaAddressBook.AddState(text);
            metaAddressBook.AddState(binary);
            metaAddressBook.AddState(binary0);
            metaAddressBook.AddState(phoneNumber);
            metaAddressBook.AddState(text0);
            metaAddressBook.AddState(prepend);

            metaAddressBook.StartingStates.Add(text);
            metaAddressBook.StartingStates.Add(text0);
            metaAddressBook.EndingStates.Add(phoneNumber);

            AddTransition(text0, binary0, 1f);
            AddTransition(binary0, text, 1f); //Allow a separation of binary between the two text fields
            AddTransition(text, binary, 0.5f);
            AddTransition(text, phoneNumber, 0.5f);
            AddTransition(binary, prepend, 0.5f);
            AddTransition(binary, phoneNumber, 0.49f);
            AddTransition(binary, binary, 0.01f);
            AddTransition(prepend, phoneNumber, 1f);
            AddTransition(phoneNumber, binary, 0.5f);

            phoneNumber.RemainingProbability = 0.5f;

            text0.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1f;

            text.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1f;

            binary0.PossibleValueProbabilities[(byte)MachineList.Binary] = 1f;

            text.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1d;

            binary.PossibleValueProbabilities[(byte)MetaMachine.Binary] = 1d;

            prepend.PossibleValueProbabilities[(byte)MetaMachine.SmsPrepend] = 1d;

            phoneNumber.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1d;

            return metaAddressBook;
        }
        /// <summary>
        /// Gets the meta state machine for an address book entry of a Nokia phone.
        /// </summary>
        /// <param name="weight">Weight of AddressBook_Nokia state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to AddressBook_Nokia state machine.</param>
        /// <returns></returns>
        public static StateMachine GetMeta_AddressBook_Nokia(int weight)
        {
            StateMachine metaAddressBook = new StateMachine { Name = MachineList.Meta_AddressBookNokia, _weight = weight };

            State text = new State { Name = "Text", ParentStateMachine = metaAddressBook };
            State binary1 = new State { Name = "Binary1", ParentStateMachine = metaAddressBook, IsBinary = true };
            State binary2 = new State { Name = "Binary2", ParentStateMachine = metaAddressBook, IsBinary = true };
            State index = new State { Name = "NumberIndex", ParentStateMachine = metaAddressBook };
            State phoneNumber = new State { Name = "PhoneNumber", ParentStateMachine = metaAddressBook };

            metaAddressBook.AddState(text);
            metaAddressBook.AddState(binary1);
            metaAddressBook.AddState(binary2);
            metaAddressBook.AddState(phoneNumber);
            metaAddressBook.AddState(index);

            metaAddressBook.StartingStates.Add(text);
            metaAddressBook.EndingStates.Add(phoneNumber);

            AddTransition(text, binary1, 1d);
            AddTransition(binary1, index, 1d);
            AddTransition(index, binary2, 1d);
            AddTransition(binary2, phoneNumber, 1d);
            AddTransition(phoneNumber, binary1, 0.5d);

            phoneNumber.RemainingProbability = 0.5d;

            text.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1d;

            binary1.PossibleValueProbabilities[(byte)MetaMachine.Binary] = 1d;
            binary2.PossibleValueProbabilities[(byte)MetaMachine.Binary] = 1d;
            index.PossibleValueProbabilities[(byte)MetaMachine.CallLogNumberIndex] = 1d;
            phoneNumber.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1d;

            return metaAddressBook;
        }
        /// <summary>
        /// Not really used anymore. The CallLog state machines have been shifted into the
        /// meta state machines.
        /// </summary>
        /// <param name="weight"></param>
        /// <returns></returns>
        public static StateMachine GetCallLogType_Moto(int weight)
        {
            var status = new StateMachine { Name = MachineList.CallLogType_Moto, _weight = 1 };

            State type1 = new State { Name = "type1", ParentStateMachine = status };
            State type2 = new State { Name = "type2", ParentStateMachine = status };

            status.AddState(type1);
            status.AddState(type2); ;

            status.StartingStates.Add(type1);
            status.EndingStates.Add(type2);

            AddTransition(type1, type2, 1d);

            type1.PossibleValueProbabilities[0x00] = 1d;

            for (byte i = 0x00; i <= 0x05; i++)
            {
                type2.PossibleValueProbabilities[i] = 1 / 6d;
            }

            var prepend = new StateMachine { Name = MachineList.CallLogTypePrepend_Moto, _weight = 1 };

            State ff1 = new State { Name = "FF1", ParentStateMachine = prepend };
            State ff2 = new State { Name = "FF2", ParentStateMachine = prepend };
            State ff3 = new State { Name = "FF3", ParentStateMachine = prepend };
            State ff4 = new State { Name = "FF4", ParentStateMachine = prepend };

            prepend.AddState(ff1);
            prepend.AddState(ff2);
            prepend.AddState(ff3);
            prepend.AddState(ff4);

            prepend.StartingStates.Add(ff1);
            prepend.EndingStates.Add(ff4);

            AddTransition(ff1, ff2, 1d);
            AddTransition(ff2, ff3, 1d);
            AddTransition(ff3, ff4, 1d);

            ff1.PossibleValueProbabilities[0xff] = 1d;
            ff2.PossibleValueProbabilities[0xff] = 1d;
            ff3.PossibleValueProbabilities[0xff] = 1d;
            ff4.PossibleValueProbabilities[0xff] = 1d;

            var statusWithPrepend = new StateMachine { Name = MachineList.CallLogTypeWithPrepend_Moto, _weight = weight };

            statusWithPrepend.StartingStates.AddRange(prepend.StartingStates);
            statusWithPrepend.EndingStates.AddRange(status.EndingStates);
            statusWithPrepend.AddState(status._states);
            statusWithPrepend.AddState(prepend._states);

            AddTransitionToStateMachine(prepend, status, 1d);

            return statusWithPrepend;
        }
        /// <summary>
        /// Call log type 0-2, inclusive as a little-endian integer.
        /// </summary>
        /// <param name="weight">Weight of CallLogType_SimpleLE state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to CallLogType_SimpleLE state machine.</param>
        /// <returns></returns>
        public static StateMachine GetCallLogType_SimpleLE(int weight)
        {
            var status = new StateMachine { Name = MachineList.CallLogType_SimpleLE, _weight = 1 };

            State type1 = new State { Name = "type1", ParentStateMachine = status };
            State type2 = new State { Name = "type2", ParentStateMachine = status };
            State type3 = new State { Name = "type3", ParentStateMachine = status };
            State type4 = new State { Name = "type4", ParentStateMachine = status };

            status.AddState(type1);
            status.AddState(type2);
            status.AddState(type3);
            status.AddState(type4);

            status.StartingStates.Add(type1);
            status.EndingStates.Add(type4);

            AddTransition(type1, type2, 1d);
            AddTransition(type2, type3, 1d);
            AddTransition(type3, type4, 1d);

            for (byte i = 0x00; i <= 0x02; i++)
            {
                type1.PossibleValueProbabilities[i] = 1 / 3d;
            }
            type2.PossibleValueProbabilities[0x00] = 1d;
            type3.PossibleValueProbabilities[0x00] = 1d;
            type4.PossibleValueProbabilities[0x00] = 1d;

            return status;
        }
        /// <summary>
        /// Initializes a list of state machines, to be aggregated together to form the final HMM for Viterbi inference at record level.
        /// </summary>
        /// <param name="machines">Stores the list of state machines which are aggregated.</param>
        /// <param name="states">List of all the states, present in the aggregated HMM.</param>
        /// <param name="startState">The starting state of the HMM.</param>
        public static void TestMeta(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {

                                       GetMeta_AddressBook_Multi(1),
                                       GetMeta_AddressBookAll(1),
                                       GetMeta_CallLogAll(1),
                                       GetMeta_SmsGeneric(1),
                                       GetMeta_SmsGeneric1(1),
                                       GetMeta_SmsSamsung(1),
                                       GetMeta_SmsMotorola(1),
                                       GetMeta_SmsMotorola1(1)
                                   };

            TestMetaStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        /// <summary>
        /// Uses the state machines in the list initialized by TestMeta() method and adds transitions to the beginnings and endings of those state machines, so as to get the final, aggregated HMM.
        /// </summary>
        /// <param name="machinesToTest">List of state machines to be aggregated.</param>
        /// <param name="machines">List of state machines to be aggregated, with the ListIndex field of each state in each state machine, storing its serial number.</param>
        /// <param name="states">List of all states present in the aggregated HMM.</param>
        /// <param name="startState">The starting state of the final, aggreagted HMM.</param>
        public static void TestMetaStateMachines(List<StateMachine> machinesToTest, ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            StateMachine start = GetStart();
            StateMachine binary = GetMeta_Binary(1);

            ///TODO: Check this value
            binary.Probability = 0.1d;

            double otherMachinesProbability = 1d - binary.Probability;

            startState = start.StartingState;

            machines.Add(binary);

            int weightSum = (from machine in machinesToTest select machine._weight).Sum();

            for (int i = 0; i < machinesToTest.Count; i++)
            {
                Console.WriteLine("Testing machine {0}", machinesToTest[i].Name);

                machinesToTest[i].Probability = (otherMachinesProbability * machinesToTest[i]._weight) / weightSum;

                machines.Add(machinesToTest[i]);
            }

            AddStatesToMainList(ref machines, ref states);

            //Need to add transitions to and from all state machines.
            for (int i = 0; i < machines.Count; i++)
            {
                AddTransitionToStateMachine(start, machines[i], machines[i].Probability);

                for (int j = 0; j < machines.Count; j++)
                {
                    AddTransitionToStateMachine(machines[i], machines[j], machines[j].Probability);
                }

                for (int j = 0; j < machines[i].EndingStates.Count; j++)
                {
                    machines[i].EndingStates[j].IsEndingState = true;
                }
            }
        }
        public static void TestAnchorFieldsOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState,
            List<UserState> userStates)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetPhoneNumber_All(8, userStates)
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        public static void TestBinaryOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        /// <summary>
        /// Initializes a list of state machines, to be aggregated together to form the final HMM for Viterbi inference at field level.
        /// </summary>
        /// <param name="machines">Stores the list of state machines which are aggregated.</param>
        /// <param name="states">List of all the states, present in the aggregated HMM.</param>
        /// <param name="startState">The starting state of the HMM.</param>
        /// <param name="userStates">Not used.</param>
        public static void SqliteLiteOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState,
            List<UserState> userStates = null)
        {
            var testMachines = new List<StateMachine>
                                   {
                                        GetSqliteRecord(100)
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        /// <summary>
        /// Gets the state machine corresponing to the Endian of a Unicode string.
        /// </summary>
        /// <returns></returns>
        public static StateMachine GetUnicodeStringEndian()
        {
            StateMachine unicodeString = new StateMachine { Name = MachineList.Text_UnicodeEndian };

            State uniNull0 = new State { Name = "UniNull0", ParentStateMachine = unicodeString };
            State uniChar0 = new State { Name = "UniChar0", ParentStateMachine = unicodeString };

            State uniNull1 = new State { Name = "UniNull1", ParentStateMachine = unicodeString };
            State uniChar1 = new State { Name = "UniChar1", ParentStateMachine = unicodeString };

            State uniNull2 = new State { Name = "UniNull2", ParentStateMachine = unicodeString };
            State uniChar2 = new State { Name = "UniChar2", ParentStateMachine = unicodeString };
            State uniChar2Punct = new State { Name = "UniChar2Punct", ParentStateMachine = unicodeString };

            unicodeString.AddState(uniNull0);
            unicodeString.AddState(uniChar0);
            unicodeString.AddState(uniNull1);
            unicodeString.AddState(uniChar1);
            unicodeString.AddState(uniNull2);
            unicodeString.AddState(uniChar2);
            unicodeString.AddState(uniChar2Punct);

            unicodeString.StartingStates.Add(uniChar0);
            unicodeString.EndingStates.Add(uniNull2);
            unicodeString.EndingStates.Add(uniNull1);

            AddTransition(uniChar0, uniNull0, 1f);
            AddTransition(uniNull0, uniChar1, 1f);
            AddTransition(uniChar1, uniNull1, 1f);
            AddTransition(uniNull1, uniChar2, 0.69f);
            AddTransition(uniNull1, uniChar2Punct, 0.30f);
            uniNull1.RemainingProbability = 0.1f;

            AddTransition(uniChar2, uniNull2, 1f);
            AddTransition(uniChar2Punct, uniNull2, 1f);

            AddTransition(uniNull2, uniChar2, 0.69f);
            AddTransition(uniNull2, uniChar2Punct, 0.30f);
            uniNull2.RemainingProbability = 0.01f;

            uniNull0.PossibleValueProbabilities[0x00] = 1d;
            uniNull1.PossibleValueProbabilities[0x00] = 1d;
            uniNull2.PossibleValueProbabilities[0x00] = 1d;

            uniChar1.PossibleValueProbabilities[0x20] = 1 / 54d;
            uniChar1.PossibleValueProbabilities[0x2D] = 1 / 54d;

            uniChar2.PossibleValueProbabilities[0x20] = 1 / 54d;
            uniChar2.PossibleValueProbabilities[0x2D] = 1 / 54d;

            //Upper case letters
            for (byte i = 0x41; i <= 0x5a; i++)
            {
                uniChar0.PossibleValueProbabilities[i] = 2d;
                uniChar1.PossibleValueProbabilities[i] = 1 / 54d;
                uniChar2.PossibleValueProbabilities[i] = 1 / 54d;
            }

            //Lower case letters
            for (byte i = 0x61; i <= 0x7a; i++)
            {
                uniChar0.PossibleValueProbabilities[i] = 1d;
                uniChar1.PossibleValueProbabilities[i] = 1 / 54d;
                uniChar2.PossibleValueProbabilities[i] = 1 / 54d;
            }

            //printable (non-alpha)
            for (int i = 33; i <= 64; i++)
            {
                uniChar2Punct.PossibleValueProbabilities[i] = 1d;
            }

            uniChar2Punct.PossibleValueProbabilities[0x20] = 1d;

            //printable (non-alpha)
            for (int i = 91; i <= 96; i++)
            {
                uniChar2Punct.PossibleValueProbabilities[i] = 1d;
            }

            //printable (non-alpha)
            for (int i = 123; i <= 127; i++)
            {
                uniChar2Punct.PossibleValueProbabilities[i] = 1d;
            }

            uniChar2Punct.NormalizeProbabilities();
            uniChar0.NormalizeProbabilities();

            return unicodeString;
        }
 /// <summary>
 /// Gets the state machine for a user-defined timestamp.
 /// </summary>
 /// <param name="userState">Object representing the user-defined state machine.</param>
 /// <param name="weight"></param>
 /// <returns>The state machine.</returns>
 public static StateMachine GetTimestamp_UserDefined(UserState userState, int weight)
 {
     StateMachine timestamp = new StateMachine { Name = userState.MachineType, _weight = weight };
     State prevState = null;
     // Have a minimum of 2 bytes.
     for (int n = 0; n < userState.Bytes.Count - 1; n++)
     {
         string nm = String.Format("UserTimestampByte{0}", n);
         State state = new State { Name = nm, ParentStateMachine = timestamp };
         timestamp.AddState(state);
         if (n == 0)
         {
             timestamp.StartingStates.Add(state);
         }
         else
         {
             AddTransition(prevState, state, 1d);
         }
         prevState = state;
         UserDefinedByteProbabilities(state, userState.Bytes[n]);
     }
     UserDefinedTimestampState endState = new UserDefinedTimestampState(userState) { Name = "EndUserTimestampByte", ParentStateMachine = timestamp };
     timestamp.AddState(endState);
     UserDefinedByteProbabilities(endState, userState.Bytes[userState.Bytes.Count - 1]);
     AddTransition(prevState, endState, 1d);
     timestamp.EndingStates.Add(endState);
     return timestamp;
 }
 /// <summary>
 /// Defines the value probabilities for a user defined byte.
 /// </summary>
 /// <param name="state">State machine.</param>
 /// <param name="userByte">UserByte object representing the byte.</param>
 public static void UserDefinedByteProbabilities(State state, UserByte userByte)
 {
     if (userByte.All)
     {
         state.IsBinary = true;
     }
     else
     {
         foreach (byte b in userByte.Values)
         {
             state.PossibleValueProbabilities[b] = 1d;
         }
         state.NormalizeProbabilities();
     }
 }
        public static void TestMoto(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetText(10),
                                        GetPhoneNumber_MotoSevenUnicode(),
                                        GetPhoneNumber_MotoTenUnicode(),
                                        GetPhoneNumber_MotoElevenUnicode(),
                                        GetPhoneNumber_MotoElevenDigit(),
                                        GetPhoneNumber_MotoSevenDigit(),
                                        GetPhoneNumber_MotoTenDigit(),
                                        GetCallLog_MotoTypeAndTime(1),
                                        GetTimestamp_Sms(1),
                                        GetPhoneNumber_InternationalFormatSevenDigit(),
                                        GetPhoneNumber_InternationalFormatTenDigit(),
                                        GetPhoneNumber_InternationalFormatElevenDigit()
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        /// <summary>
        /// Adds a transition to a between states of two different state machines.
        /// </summary>
        /// <param name="fromState">The state from which the transition occurs.</param>
        /// <param name="toState">The state to which the transition occurs.</param>
        /// <param name="probability">The probability of transition.</param>
        private static void AddTransition(State fromState, State toState, double probability)
        {
            fromState.AddTransition(fromState, toState, probability);

            //We do not want to add the same transition twice to the same state
            //if (toState != fromState)

            toState.AddTransition(fromState, toState, probability);
        }
        public static void TestMotoPhoneOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetPhoneNumber_MotoSevenDigit(),
                                       GetPhoneNumber_MotoTenDigit(),
                                       GetPhoneNumber_MotoElevenDigit()
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
 private void AddState(State newState)
 {
     if (!_states.Contains(newState))
         _states.Add(newState);
     else
     {
         throw new ArgumentException("This state has already been added to the state machine");
     }
 }
        public static void TestPhoneNumberTextAndTimeStamp(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetText(2),
                                       GetPhoneNumber_All(3),
                                       GetTimeStamp_All(1),
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        public static StateMachine GetCallLog_NokiaNumberIndexAndNumber(int weight)
        {
            var numberIndexAndNumber = new StateMachine { Name = MachineList.CallLogNumberIndexAndNumber_Nokia, _weight = weight };
            var machine = new StateMachine { Name = MachineList.Binary, _weight = 1 };

            State binary1 = new State { Name = "Binary1", ParentStateMachine = machine, AllValuesPossible = true, IsBinary = true };
            State binary2 = new State { Name = "Binary2", ParentStateMachine = machine, AllValuesPossible = true, IsBinary = true };
            State binary3 = new State { Name = "Binary3", ParentStateMachine = machine, AllValuesPossible = true, IsBinary = true };
            State binary4 = new State { Name = "Binary4", ParentStateMachine = machine, AllValuesPossible = true, IsBinary = true };

            machine.AddState(binary1);
            machine.AddState(binary2);
            machine.AddState(binary3);
            machine.AddState(binary4);

            //Set the transition probability to be lower so that this machine does not dominate others. e.g. the international format
            AddTransition(binary1, binary2, 0.1f);
            AddTransition(binary2, binary3, 0.1f);
            AddTransition(binary3, binary4, 0.1f);

            machine.StartingStates.Add(binary1);
            machine.EndingStates.Add(binary4);

            var index = GetPhoneNumber_NokiaNumberIndex(1);
            var number = GetPhoneNumber_NokiaAll(1);

            AddTransitionToStateMachine(index, machine, 1d);
            AddTransitionToStateMachine(machine, number, 1d);

            numberIndexAndNumber.AddState(index._states);
            numberIndexAndNumber.AddState(machine._states);
            numberIndexAndNumber.AddState(number._states);

            numberIndexAndNumber.StartingStates.AddRange(index.StartingStates);
            numberIndexAndNumber.EndingStates.AddRange(number.EndingStates);

            return numberIndexAndNumber;
        }
        /// <summary>
        /// Gets the meta state machine for a binary byte.
        /// </summary>
        /// <param name="weight">Weight of Binary state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to Binary state machine.</param>
        /// <returns></returns>
        public static StateMachine GetMeta_Binary(int weight)
        {
            StateMachine binary = new StateMachine { Name = MachineList.Meta_Binary, _weight = weight };

            State binaryByte = new State { Name = "BinaryByte", ParentStateMachine = binary, AllValuesPossible = true, IsBinary = true };

            binary.AddState(binaryByte);
            binary.StartingStates.Add(binaryByte);
            binary.EndingStates.Add(binaryByte);

            return binary;
        }
        /// <summary>
        /// Gets the meta state machine for an address book entry.
        /// </summary>
        /// <param name="weight">Weight of AddressBook state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to AddressBook state machine.</param>
        /// <returns></returns>
        public static StateMachine GetMeta_AddressBook1(int weight)
        {
            StateMachine metaAddressBook = new StateMachine { Name = MachineList.Meta_AddressBook, _weight = weight };

            State text = new State { Name = "Text", ParentStateMachine = metaAddressBook };
            State binary = new State { Name = "Binary", ParentStateMachine = metaAddressBook, IsBinary = true };
            State phoneNumber = new State { Name = "PhoneNumber", ParentStateMachine = metaAddressBook };

            metaAddressBook.AddState(text);
            metaAddressBook.AddState(binary);

            metaAddressBook.AddState(phoneNumber);

            metaAddressBook.StartingStates.Add(phoneNumber);
            metaAddressBook.EndingStates.Add(text);

            AddTransition(phoneNumber, binary, 0.5f);
            AddTransition(phoneNumber, text, 0.5f);
            AddTransition(binary, text, 1.0f);

            text.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1f;

            binary.PossibleValueProbabilities[(byte)MetaMachine.Binary] = 1d;

            phoneNumber.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1d;

            return metaAddressBook;
        }
        /// <summary>
        /// Gets the state machine for a timestamp correponding to an SMS.
        /// </summary>
        /// <param name="weight">Weight of TimeStamp_Sms state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to TimeStamp_Sms state machine.</param>
        /// <returns></returns>
        public static StateMachine GetTimestamp_SmsGsm(int weight)
        {
            var timestamp = new StateMachine { Name = MachineList.TimeStamp_SmsGsm, _weight = weight };

            State year = new State { Name = "Year", ParentStateMachine = timestamp };
            State month = new State { Name = "Month", ParentStateMachine = timestamp };
            State day = new State { Name = "Day", ParentStateMachine = timestamp };
            State hour = new State { Name = "Hour", ParentStateMachine = timestamp };
            State minute = new State { Name = "Minute", ParentStateMachine = timestamp };
            State second = new State { Name = "Second", ParentStateMachine = timestamp };
            SmsGsmTimeState timezone = new SmsGsmTimeState { Name = "Timezone", ParentStateMachine = timestamp };

            timestamp.AddState(year);
            timestamp.AddState(month);
            timestamp.AddState(day);
            timestamp.AddState(hour);
            timestamp.AddState(minute);
            timestamp.AddState(second);
            timestamp.AddState(timezone);

            timestamp.StartingStates.Add(year);
            timestamp.EndingStates.Add(timezone);

            AddTransition(year, month, 1d);
            AddTransition(month, day, 1d);
            AddTransition(day, hour, 1d);
            AddTransition(hour, minute, 1d);
            AddTransition(minute, second, 1d);
            AddTransition(second, timezone, 1d);

            int yearWeight = 0;

            //int startYr = Math.Max(2010, TimeConstants.START_YEAR);
            int startYr = TimeConstants.START_YEAR;
            for (int i = (startYr - 2000); i <= (TimeConstants.END_YEAR - 2000); i++)
            {
                var byteTest = byte.Parse(Convert.ToString(i), NumberStyles.HexNumber);
                var byteVal = Printer.SwapNibbles(byteTest);

                year.PossibleValueProbabilities[byteVal] = 1f + yearWeight;

                yearWeight++;
            }

            year.NormalizeProbabilities();

            for (int i = 1; i <= 12; i++)
            {
                var byteTest = byte.Parse(Convert.ToString(i), NumberStyles.HexNumber);
                var byteVal = Printer.SwapNibbles(byteTest);

                month.PossibleValueProbabilities[byteVal] = 1 / 12d;
            }

            for (int i = 1; i <= 31; i++)
            {
                var byteTest = byte.Parse(Convert.ToString(i), NumberStyles.HexNumber);
                var byteVal = Printer.SwapNibbles(byteTest);

                day.PossibleValueProbabilities[byteVal] = 1 / 31d;
            }

            for (int i = 0; i <= 23; i++)
            {
                var byteTest = byte.Parse(Convert.ToString(i), NumberStyles.HexNumber);
                var byteVal = Printer.SwapNibbles(byteTest);

                hour.PossibleValueProbabilities[byteVal] = 1 / 24d;
            }

            for (int i = 0; i <= 59; i++)
            {
                var byteTest = byte.Parse(Convert.ToString(i), NumberStyles.HexNumber);
                var byteVal = Printer.SwapNibbles(byteTest);

                minute.PossibleValueProbabilities[byteVal] = 1 / 60d;
                second.PossibleValueProbabilities[byteVal] = 1 / 60d;
            }

            // Each interval represents a 15 minute GMT offset. If the most
            // significant bit (before swapping) is set, it's a negative value.
            for (int i = 0; i < 96; i++)
            {
                // Skip 15 minutes time zones, except Nepal's.
                if (((i % 2) == 1) && (i != 23)) continue;
                // Swapped BCD.
                byte byteVal = Printer.SwapNibbles(Printer.ByteFromNibbles(i / 10, i % 10));
                timezone.PossibleValueProbabilities[byteVal] = 1d;
                if ((i != 0) && (i != 23))
                {
                    // Mark negative (bytes are already swapped)
                    byteVal |= 0x08;
                    timezone.PossibleValueProbabilities[byteVal] = 1d;
                }
            }
            timezone.NormalizeProbabilities();

            return timestamp;
        }
        public static void TestPhoneOnly(ref List<StateMachine> machines, ref List<State> states, ref State startState)
        {
            var testMachines = new List<StateMachine>
                                   {
                                       GetPhoneNumber_All(1)
                                   };

            TestStateMachines(testMachines, ref machines, ref states, ref startState);
        }
        public static StateMachine GetBinaryFF()
        {
            StateMachine binaryFF = new StateMachine { Name = MachineList.BinaryFF };
            State ffByte1 = new State { Name = "BinaryFF_1", ParentStateMachine = binaryFF, IsBinary = true };
            State ffByte2 = new State { Name = "BinaryFF_2", ParentStateMachine = binaryFF, IsBinary = true };
            State ffByte3 = new State { Name = "BinaryFF_3", ParentStateMachine = binaryFF, IsBinary = true };
            State ffByte4 = new State { Name = "BinaryFF_4", ParentStateMachine = binaryFF, IsBinary = true };
            State ffByte5 = new State { Name = "BinaryFF_5", ParentStateMachine = binaryFF, IsBinary = true };

            binaryFF.AddState(ffByte1);
            binaryFF.AddState(ffByte2);
            binaryFF.AddState(ffByte3);
            binaryFF.AddState(ffByte4);
            binaryFF.AddState(ffByte5);

            ffByte1.PossibleValueProbabilities[0xff] = 1d;
            ffByte2.PossibleValueProbabilities[0xff] = 1d;
            ffByte3.PossibleValueProbabilities[0xff] = 1d;
            ffByte4.PossibleValueProbabilities[0xff] = 1d;
            ffByte5.PossibleValueProbabilities[0xff] = 1d;

            AddTransition(ffByte1, ffByte2, 1d);
            AddTransition(ffByte2, ffByte3, 1d);
            AddTransition(ffByte3, ffByte4, 1d);
            AddTransition(ffByte4, ffByte5, 0.99d);
            ffByte4.RemainingProbability = 0.01d;
            AddTransition(ffByte5, ffByte4, 0.99d);
            ffByte5.RemainingProbability = 0.01d;

            binaryFF.StartingStates.Add(ffByte1);
            binaryFF.EndingStates.Add(ffByte4);
            binaryFF.EndingStates.Add(ffByte5);

            return binaryFF;
        }
        /// <summary>
        /// Gets the meta state machine for a generic Call Log.
        /// </summary>
        /// <param name="weight">Weight of CallLogGeneric state machine, among the set of state machines to be aggregated, governing a prior probability that the inferred sequence of states corresponds to CallLogGeneric state machine.</param>
        /// <returns></returns>
        public static StateMachine GetMeta_CallLogGeneric(int weight)
        {
            StateMachine metaCallLog = new StateMachine { Name = MachineList.Meta_CallLogGeneric, _weight = weight };

            State textStart = new State { Name = "Text", ParentStateMachine = metaCallLog };
            State text = new State { Name = "Text", ParentStateMachine = metaCallLog };
            State binaryA = new State { Name = "Binary", ParentStateMachine = metaCallLog, IsBinary = true };
            State binaryB = new State { Name = "Binary", ParentStateMachine = metaCallLog, IsBinary = true };
            State phoneNumber = new State { Name = "PhoneNumber", ParentStateMachine = metaCallLog };
            State phoneNumberStartWText = new State { Name = "PhoneNumber", ParentStateMachine = metaCallLog };
            State phoneNumberStart = new State { Name = "PhoneNumber", ParentStateMachine = metaCallLog };
            State binary2 = new State { Name = "Binary2", ParentStateMachine = metaCallLog, IsBinary = true };
            State timeStamp = new State { Name = "TimeStamp", ParentStateMachine = metaCallLog };

            metaCallLog.AddState(text);
            metaCallLog.AddState(binaryA);
            metaCallLog.AddState(binaryB);
            metaCallLog.AddState(phoneNumber);
            metaCallLog.AddState(binary2);
            metaCallLog.AddState(timeStamp);
            metaCallLog.AddState(textStart);
            metaCallLog.AddState(phoneNumberStartWText);
            metaCallLog.AddState(phoneNumberStart);

            metaCallLog.StartingStates.Add(textStart);
            metaCallLog.StartingStates.Add(phoneNumberStart);
            metaCallLog.StartingStates.Add(phoneNumberStartWText);
            metaCallLog.EndingStates.Add(timeStamp);

            //Starting path txt -> number
            AddTransition(textStart, binaryA, 1f);
            AddTransition(binaryA, phoneNumber, 0.99f);
            AddTransition(binaryA, binaryA, 0.01f);
            AddTransition(phoneNumber, binary2, 1f);

            //starting path number -> txt
            AddTransition(phoneNumberStartWText, binaryB, 1f);
            AddTransition(binaryB, text, 0.99f);
            AddTransition(binaryB, binaryB, 0.01f);
            AddTransition(text, binary2, 1f);

            //starting path number -> no text
            AddTransition(phoneNumberStart, binary2, 1f);

            AddTransition(binary2, timeStamp, 0.99f);
            AddTransition(binary2, binary2, 0.01f);
            AddTransition(timeStamp, binary2, 0.9f);
            timeStamp.RemainingProbability = 0.1f;

            for (int i = 0; i < 256; i++)
            {
                binaryA.PossibleValueProbabilities[i] = 1f;
                binaryB.PossibleValueProbabilities[i] = 1f;
                binary2.PossibleValueProbabilities[i] = 1f;
            }

            binaryA.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 0f;
            binaryA.PossibleValueProbabilities[(byte)MetaMachine.Text] = 0f;
            binaryA.PossibleValueProbabilities[(byte)MetaMachine.TimeStamp] = 0f;
            binaryA.PossibleValueProbabilities[(byte)MetaMachine.BinaryLarge] = 0f;

            binaryB.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 0f;
            binaryB.PossibleValueProbabilities[(byte)MetaMachine.Text] = 0f;
            binaryB.PossibleValueProbabilities[(byte)MetaMachine.TimeStamp] = 0f;
            binaryB.PossibleValueProbabilities[(byte)MetaMachine.BinaryLarge] = 0f;

            binary2.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 0f;
            binary2.PossibleValueProbabilities[(byte)MetaMachine.Text] = 0f;
            binary2.PossibleValueProbabilities[(byte)MetaMachine.TimeStamp] = 0f;
            binary2.PossibleValueProbabilities[(byte)MetaMachine.BinaryLarge] = 0f;

            binaryA.NormalizeProbabilities();
            binaryB.NormalizeProbabilities();
            binary2.NormalizeProbabilities();

            text.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1f;
            textStart.PossibleValueProbabilities[(byte)MetaMachine.Text] = 1f;
            phoneNumber.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1f;
            phoneNumberStart.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1f;
            phoneNumberStartWText.PossibleValueProbabilities[(byte)MetaMachine.PhoneNumber] = 1f;
            timeStamp.PossibleValueProbabilities[(byte)MetaMachine.TimeStamp] = 1f;

            return metaCallLog;
        }
 public static void TestStateMachine(StateMachine machineToTest, ref List<StateMachine> machines, ref List<State> states, ref State startState)
 {
     TestStateMachines(new List<StateMachine> { machineToTest }, ref machines, ref states, ref startState);
 }