public ThreadedViterbi(List<Block> UnfilteredBlocks, RunType runType, List<UserState> userStates, string file_path, string fileSha1)
        {
            _unfilteredBlocks = UnfilteredBlocks;
            _userStates = userStates;
            _filePath = file_path;
            _machines = new List<StateMachine>();
            _states = new List<State>();
            _viterbiResults = new ViterbiResult();
            _viterbiResults.Fields = new List<ViterbiField>();
            _fileSha1 = fileSha1;
            _viterbiResults.MemoryId = this._fileSha1;
            _runType = runType;

            //TODO: (RJW) I am not convinced this bit of code loads the machines as Shaksham intended. 
            // This call is to load _machines, _states, _startState and _userStates variables, so that we do not execute same code for every block to load values
            // into these variables, since they are the same for all blocks.
            if (_runType == RunType.GeneralParse)
            {
                Viterbi viterbi = new Viterbi(RunType.GeneralParse, true, ref _machines, ref _states, ref _startState, ref _userStates);
            }
            else if (_runType == RunType.Meta)
            {
                Viterbi viterbi = new Viterbi(RunType.Meta, false, ref _machines, ref _states, ref _startState, ref _userStates);
                _unfilteredBlocks = Split_On_Binary_Large_Fields(_unfilteredBlocks[0].Bytes);
            }
            else
            {
                Viterbi viterbi = new Viterbi(_runType, false, ref _machines, ref _states, ref _startState, ref _userStates);
            }

        }
        public ThreadedViterbi(List <Block> UnfilteredBlocks, RunType runType, List <UserState> userStates, string file_path, string fileSha1)
        {
            _unfilteredBlocks        = UnfilteredBlocks;
            _userStates              = userStates;
            _filePath                = file_path;
            _machines                = new List <StateMachine>();
            _states                  = new List <State>();
            _viterbiResults          = new ViterbiResult();
            _viterbiResults.Fields   = new List <ViterbiField>();
            _fileSha1                = fileSha1;
            _viterbiResults.MemoryId = this._fileSha1;
            _runType                 = runType;

            //TODO: (RJW) I am not convinced this bit of code loads the machines as Shaksham intended.
            // This call is to load _machines, _states, _startState and _userStates variables, so that we do not execute same code for every block to load values
            // into these variables, since they are the same for all blocks.
            if (_runType == RunType.GeneralParse)
            {
                Viterbi viterbi = new Viterbi(RunType.GeneralParse, true, ref _machines, ref _states, ref _startState, ref _userStates);
            }
            else if (_runType == RunType.Meta)
            {
                Viterbi viterbi = new Viterbi(RunType.Meta, false, ref _machines, ref _states, ref _startState, ref _userStates);
                _unfilteredBlocks = Split_On_Binary_Large_Fields(_unfilteredBlocks[0].Bytes);
            }
            else
            {
                Viterbi viterbi = new Viterbi(_runType, false, ref _machines, ref _states, ref _startState, ref _userStates);
            }
        }
        private void RunBlockThread(List <Block> BlockSet, ref List <ViterbiResult> ResultsOnBlocks, ref object mutex, ref int job_count, ref ManualResetEvent manual)
        {
            ViterbiResult viterbiResultFields = null;

            if (_runType != RunType.Meta)
            {
#if !_GENERAL_PARSE
                AnchorViterbi anchor_viterbi = new AnchorViterbi(RunType.GeneralParse, _filePath, new List <UserState>(_userStates));
                viterbiResultFields = anchor_viterbi.RunThreaded(BlockSet, ref _machines, ref _states, ref _startState, ref _userStates);
#else
                Viterbi viterbi = new Viterbi(_runType, false, ref _machines, ref _states, ref _startState, ref _userStates);
                viterbiResultFields = viterbi.Run(BlockSet, _filePath);
#endif
            }
            else
            {
                Viterbi viterbi = new Viterbi(RunType.Meta, false, ref _machines, ref _states, ref _startState, ref _userStates);
                viterbiResultFields = viterbi.Run(BlockSet, _filePath);
            }
            lock (mutex)
            {
                ResultsOnBlocks.Add(viterbiResultFields);
                job_count--;
                if (job_count == 0)
                {
                    manual.Set(); // signal that all threads are done
                }
            }
        }
Exemple #4
0
        public List <Block> GetAnchorPointBlocks(List <Block> unfilteredBlocks)
        {
            var anchor = new Viterbi(RunType.AnchorPoints, true, _userStates);

            var blocks = new List <Block>();

            //The list of Fields in the result are the anchor points
            _anchorResults = anchor.Run(unfilteredBlocks, _filePath);

            if (_anchorResults.Fields.Count == 0)
            {
                //throw new ApplicationException("No anchor points found!");
                return(new List <Block>());
            }


            //The next index after the end of the anchor point
            long nextIndex = _anchorResults.Fields[0].OffsetFile + _anchorResults.Fields[0].Length;

            //Initialize the start of the first block to the first anchor point - X bytes. If this goes past the start of the file,
            // we will just use the start of the file.
            long blockStart = Math.Max(0, _anchorResults.Fields[0].OffsetFile - BLOCK_PADDING_BYTES);

            //Initialize the end of the first block to be X bytes past the first anchor points. If this
            // goes past the end of the file, we will just use the end of the file.
            long blockEnd = Math.Min(_fileLength, _anchorResults.Fields[0].OffsetFile + _anchorResults.Fields[0].Length + BLOCK_PADDING_BYTES);


            FileStream stream = new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read); // BL 8/4

            //Get blocks on data based on the anchor points.
            for (int i = 1; i < _anchorResults.Fields.Count; i++)
            {
                //The size of the gap in bytes between the two anchor points
                long gap = _anchorResults.Fields[i].OffsetFile - nextIndex;

                //If the gap is too long, we need to create a new block
                if (gap > LONG_GAP_BYTES || i == _anchorResults.Fields.Count - 1)
                {
                    //Create block using current block start and end
                    // blocks.Add(GetBlock(blockStart, blockEnd)); // BL 8/4
                    blocks.Add(GetBlock(stream, blockStart, blockEnd));  // BL 8/4

                    blockStart = Math.Max(0, _anchorResults.Fields[i].OffsetFile - BLOCK_PADDING_BYTES);
                }

                blockEnd = Math.Min(_fileLength, _anchorResults.Fields[i].OffsetFile + _anchorResults.Fields[i].Length + BLOCK_PADDING_BYTES);

                nextIndex = _anchorResults.Fields[i].OffsetFile + _anchorResults.Fields[i].Length;
            }
            stream.Close(); // BL 8/4

            return(blocks);
        }
Exemple #5
0
        /// <summary>
        /// Calls the Run() method of Viterbi to start field level inference.
        /// </summary>
        /// <param name="unfilteredBlocks">List of blocks over which inference is to be performed.</param>
        /// <returns>The most likely Viterbi path of fields explaining the set of outputs.</returns>
        public ViterbiResult Run(List <Block> unfilteredBlocks)
        {
            var anchorBlocks = GetAnchorPointBlocks(unfilteredBlocks);

            Console.WriteLine("{0} Anchor block bytes.", Block.GetByteTotal(anchorBlocks));

            var viterbi = new Viterbi(_runType, false, _userStates);

            _fieldResults          = viterbi.Run(anchorBlocks, _filePath);
            _fieldResults.Duration = _fieldResults.Duration.Add(_anchorResults.Duration);

            _fieldResults.MemoryId = MemoryId;

            return(_fieldResults);
        }
        private void Load__Intermediate_Field_Results()
        {
            string resfile = _filePath + ".vtf";

            using (Stream instream = File.OpenRead(resfile))
            {
                BinaryFormatter serializer = new BinaryFormatter();
                var             results    = (ViterbiResult)serializer.Deserialize(instream);

                // Check if the results file was created from the same binary input file
                if (_viterbiResults.MemoryId == results.MemoryId)
                {
                    _viterbiResults = results;
                }
            }
        }
Exemple #7
0
        private bool LoadResults()
        {
            string resfile = _filePath + ".vtf";

            if (File.Exists(resfile))
            {
                using (Stream instream = File.OpenRead(resfile))
                {
                    BinaryFormatter serializer = new BinaryFormatter();
                    var             results    = (ViterbiResult)serializer.Deserialize(instream);

                    // Check if the results file was created from the same binary input file
                    if (results.MemoryId == MemoryId)
                    {
                        _fieldResults = results;
                    }
                }
            }

            return(_fieldResults != null);
        }
        private ViterbiResult RunViterbi(FilterResult filterResult, string fileSha1)
        {
            // Load any user-defined state machines.
            List<UserState> userStates;
            try
            {
                userStates = Loader.LoadUserStates(MainForm.Program.UserStatesEnabled,
                                                   MainForm.Program.UserStatesPath);
            }
            catch (Exception ex)
            {
                DisplayExceptionMessages(ex, "User-Defined States");
                return null;
            }
            ViterbiResult viterbiResultFields = null;

            try
            {
                write("Running Viterbi on fields");
                DateTime dt = DateTime.Now;
            #if _GENERALPARSE
                Viterbi viterbi = new Viterbi(RunType.GeneralParse, false);
                viterbiResultFields = viterbi.Run(filterResult.UnfilteredBlocks, this.filePath);
            #else
            #if SKIPREALWORK
                    viterbiResultFields = new ViterbiResult();
                    viterbiResultFields.Fields = new List<ViterbiField>();
            #else
                if (filterResult.UnfilteredBlocks.Count > 0)
                {
                    ThreadedViterbi tv = new ThreadedViterbi(filterResult.UnfilteredBlocks, RunType.GeneralParse, userStates, this.filePath, this.fileSha1);
                    viterbiResultFields = tv.RunThreadedViterbi();
                    TimeSpan ts = DateTime.Now.Subtract(dt);
                    write("Time elapsed for Viterbi fields: {0}", ts.ToString("c"));
                    write("Field count: {0}", viterbiResultFields.Fields.Count);
                }
            #endif
            #endif
                filterResult.UnfilteredBlocks.Clear(); // Allow gc to clean things up
            }
            catch (ThreadAbortException)
            {
                return null;
            }
            catch (Exception ex)
            {
                DisplayExceptionMessages(ex, "Viterbi Fields");
                return null;
            }
            return viterbiResultFields;
        }
 private ViterbiResult RunMetaViterbi(ViterbiResult viterbiResultFields, List<MetaField> addressBookEntries, List<MetaField> callLogs, List<MetaField> sms)
 {
     ViterbiResult viterbiResultRecord = null;
     try
     {
         if (viterbiResultFields != null)
         {
             write("Running Viterbi on records");
             DateTime dt = DateTime.Now;
             metaResults = CreateMetaInfo(viterbiResultFields.Fields);
             Block block = new Block()
             {
                 Bytes = metaResults.Select(r => (byte)r.Name).ToArray(),
                 OffsetFile = 0
             };
             var blockList = new List<Block> { block };
             List<UserState> user_state = new List<UserState>();
             ThreadedViterbi tv = new ThreadedViterbi(blockList, RunType.Meta, user_state, this.filePath, this.fileSha1);
             viterbiResultRecord = tv.RunThreadedViterbi();
     #if false
             TextWriter tw = null;
             try {
                 if (viterbiResultRecord == null) throw new Exception("No results");
                 tw = new StreamWriter(Path.Combine(@"C:\temp", String.Format("Records_{0}.csv", DateTime.Now.ToString("yyyyMMdd_HHmm"))));
                 foreach (ViterbiField f in viterbiResultRecord.Fields) {
                     tw.WriteLine("{0}\t{1}\t{2}", f.OffsetFile, f.FieldString, f.MachineName.ToString());
                 }
             } catch (Exception ex) {
             } finally {
                 if (tw != null) tw.Close();
             }
     #endif
             TimeSpan ts = DateTime.Now.Subtract(dt);
             write("Time elapsed for Viterbi records: {0}", ts.ToString("c"));
             InterpretResults(metaResults, viterbiResultRecord, addressBookEntries, callLogs, sms);
             write("Entries: call log = {0}, address book = {1}, sms = {2}", callLogs.Count,
                   addressBookEntries.Count,
                   sms.Count);
         }
     }
     catch (ThreadAbortException)
     {
         return null;
     }
     catch (Exception ex)
     {
         DisplayExceptionMessages(ex, "Viterbi Records");
         return null;
     }
     return viterbiResultRecord;
 }
        // NOTE: Most of the code below was taken from the original
        // dec0de console application.
        private void InterpretResults(List<MetaResult> metaResults, ViterbiResult viterbiResult,
            List<MetaField> addressBookEntries, List<MetaField> callLogs, List<MetaField> sms)
        {
            for (int i = 0; i < viterbiResult.Fields.Count; i++)
            {
                try
                {
                    switch (viterbiResult.Fields[i].MachineName)
                    {
                        case MachineList.Meta_AddressBookNokia:
                        case MachineList.Meta_AddressBook:
                            var results = GetMetaAddressBookEntry(viterbiResult.Fields[i], metaResults);
                            addressBookEntries.AddRange(results);
                            break;

                        case MachineList.Meta_CallLogNokiaMulti_v2:
                        case MachineList.Meta_CallLogNokiaMulti:
                            var results2 = GetMetaCallLogNokia(viterbiResult.Fields[i], metaResults);
                            callLogs.AddRange(results2);
                            break;

                        case MachineList.Meta_CallLogAll:
                        case MachineList.Meta_CallLogGeneric:
                        case MachineList.Meta_CallLogGeneric2:
                        case MachineList.Meta_CallLogGeneric3:
                        case MachineList.Meta_CallLogGeneric4:
                        case MachineList.Meta_CallLogNokiaSingle:
                        case MachineList.Meta_CallLogMoto:
                        case MachineList.Meta_CallLogSamsung:
                            var results1 = GetMetaCallLog(viterbiResult.Fields[i], metaResults);
                            callLogs.AddRange(results1);
                            break;
                        case MachineList.Meta_Sms:
                            var result = GetMetaSms(viterbiResult.Fields[i], metaResults);
                            sms.Add(result);
                            break;
                        default:
                            throw new ArgumentOutOfRangeException();
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }
        private void Load__Intermediate_Field_Results()
        {
            string resfile = _filePath + ".vtf";

            using (Stream instream = File.OpenRead(resfile))
            {
                BinaryFormatter serializer = new BinaryFormatter();
                var results = (ViterbiResult)serializer.Deserialize(instream);

                // Check if the results file was created from the same binary input file
                if (_viterbiResults.MemoryId == results.MemoryId)
                {
                    _viterbiResults = results;
                }
            }
        }
        public List<Block> GetAnchorPointBlocks(List<Block> unfilteredBlocks)
        {
            var anchor = new Viterbi(RunType.AnchorPoints, true, _userStates);

            var blocks = new List<Block>();
            //The list of Fields in the result are the anchor points
            _anchorResults = anchor.Run(unfilteredBlocks, _filePath);

            if (_anchorResults.Fields.Count == 0)
                //throw new ApplicationException("No anchor points found!");
                return new List<Block>();

            //The next index after the end of the anchor point
            long nextIndex = _anchorResults.Fields[0].OffsetFile + _anchorResults.Fields[0].Length;

            //Initialize the start of the first block to the first anchor point - X bytes. If this goes past the start of the file,
            // we will just use the start of the file.
            long blockStart = Math.Max(0, _anchorResults.Fields[0].OffsetFile - BLOCK_PADDING_BYTES);

            //Initialize the end of the first block to be X bytes past the first anchor points. If this
            // goes past the end of the file, we will just use the end of the file.
            long blockEnd = Math.Min(_fileLength, _anchorResults.Fields[0].OffsetFile + _anchorResults.Fields[0].Length + BLOCK_PADDING_BYTES);

            FileStream stream = new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read); // BL 8/4
            //Get blocks on data based on the anchor points.
            for (int i = 1; i < _anchorResults.Fields.Count; i++)
            {
                //The size of the gap in bytes between the two anchor points
                long gap = _anchorResults.Fields[i].OffsetFile - nextIndex;

                //If the gap is too long, we need to create a new block
                if (gap > LONG_GAP_BYTES || i == _anchorResults.Fields.Count - 1)
                {
                    //Create block using current block start and end
                    // blocks.Add(GetBlock(blockStart, blockEnd)); // BL 8/4
                    blocks.Add(GetBlock(stream, blockStart, blockEnd));  // BL 8/4

                    blockStart = Math.Max(0, _anchorResults.Fields[i].OffsetFile - BLOCK_PADDING_BYTES);
                }

                blockEnd = Math.Min(_fileLength, _anchorResults.Fields[i].OffsetFile + _anchorResults.Fields[i].Length + BLOCK_PADDING_BYTES);

                nextIndex = _anchorResults.Fields[i].OffsetFile + _anchorResults.Fields[i].Length;
            }
            stream.Close(); // BL 8/4

            return blocks;
        }
        private bool LoadResults()
        {
            string resfile = _filePath + ".vtf";

            if (File.Exists(resfile))
            {
                using (Stream instream = File.OpenRead(resfile))
                {
                    BinaryFormatter serializer = new BinaryFormatter();
                    var results = (ViterbiResult)serializer.Deserialize(instream);

                    // Check if the results file was created from the same binary input file
                    if (results.MemoryId == MemoryId)
                        _fieldResults = results;
                }
            }

            return _fieldResults != null;
        }
        /// <summary>
        /// Calls the Run() method of Viterbi to start field level inference.
        /// </summary>
        /// <param name="unfilteredBlocks">List of blocks over which inference is to be performed.</param>
        /// <returns>The most likely Viterbi path of fields explaining the set of outputs.</returns>
        public ViterbiResult Run(List<Block> unfilteredBlocks)
        {
            var anchorBlocks = GetAnchorPointBlocks(unfilteredBlocks);

            Console.WriteLine("{0} Anchor block bytes.", Block.GetByteTotal(anchorBlocks));

            var viterbi = new Viterbi(_runType, false, _userStates);

            _fieldResults = viterbi.Run(anchorBlocks, _filePath);
            _fieldResults.Duration = _fieldResults.Duration.Add(_anchorResults.Duration);

            _fieldResults.MemoryId = MemoryId;

            return _fieldResults;
        }