public override DetokenizationOperation[] GetDetokenizationOperations(string[] tokens)
        {
            var operations = new DetokenizationOperation[tokens.Length];

            var matchingTokens = new HashSet <string>();

            for (int i = 0; i < tokens.Length; i++)
            {
                if (!_tokenToDetokenizationOperation.ContainsKey(tokens[i]))
                {
                    operations[i] = DetokenizationOperation.NO_OPERATION;
                    continue;
                }

                DetokenizationOperation dictOperation = _tokenToDetokenizationOperation[tokens[i]];

                if (dictOperation == DetokenizationOperation.MERGE_TO_LEFT ||
                    dictOperation == DetokenizationOperation.MERGE_TO_RIGHT ||
                    dictOperation == DetokenizationOperation.MERGE_BOTH)
                {
                    operations[i] = dictOperation;
                }
                else if (dictOperation == DetokenizationOperation.MERGE_BOTH_IF_SURROUNDED_BY_WORDS)
                {
                    if (0 < i && i < tokens.Length - 1 && WordRegex.IsMatch(tokens[i - 1]) && WordRegex.IsMatch(tokens[i + 1]))
                    {
                        operations[i] = DetokenizationOperation.MERGE_BOTH;
                    }
                    else
                    {
                        operations[i] = DetokenizationOperation.NO_OPERATION;
                    }
                }
                else if (dictOperation == DetokenizationOperation.RIGHT_LEFT_MATCHING)
                {
                    if (matchingTokens.Contains(tokens[i]))
                    {
                        // The token already occurred once, move it to the left and clear the occurrence flag
                        operations[i] = DetokenizationOperation.MERGE_TO_LEFT;
                        matchingTokens.Remove(tokens[i]);
                    }
                    else
                    {
                        // First time this token is seen, move it to the right and remember it
                        operations[i] = DetokenizationOperation.MERGE_TO_RIGHT;
                        matchingTokens.Add(tokens[i]);
                    }
                }
                else
                {
#if DNF
                    throw new InvalidEnumArgumentException("Unknown operation: " + dictOperation);
#else
                    throw new ArgumentException("Unknown operation: " + dictOperation);
#endif
                }
            }

            return(operations);
        }
        public override DetokenizationOperation[] GetDetokenizationOperations(string[] tokens)
        {
            var operations = new DetokenizationOperation[tokens.Length];

            var matchingTokens = new HashSet<string>();

            for (int i = 0; i < tokens.Length; i++)
            {
                if (!_tokenToDetokenizationOperation.ContainsKey(tokens[i]))
                {
                    operations[i] = DetokenizationOperation.NO_OPERATION;
                    continue;
                }
                
                DetokenizationOperation dictOperation = _tokenToDetokenizationOperation[tokens[i]];

                if (dictOperation == DetokenizationOperation.MERGE_TO_LEFT
                    || dictOperation == DetokenizationOperation.MERGE_TO_RIGHT
                    || dictOperation == DetokenizationOperation.MERGE_BOTH)
                {
                    operations[i] = dictOperation;
                }
                else if (dictOperation == DetokenizationOperation.MERGE_BOTH_IF_SURROUNDED_BY_WORDS)
                {
                    if(0 < i && i < tokens.Length - 1 && WordRegex.IsMatch(tokens[i-1]) && WordRegex.IsMatch(tokens[i+1])){
                        operations[i] = DetokenizationOperation.MERGE_BOTH;
                    }
                    else
                    {
                        operations[i] = DetokenizationOperation.NO_OPERATION;
                    }
                }
                else if (dictOperation == DetokenizationOperation.RIGHT_LEFT_MATCHING)
                {
                    if (matchingTokens.Contains(tokens[i]))
                    {
                        // The token already occurred once, move it to the left and clear the occurrence flag
                        operations[i] = DetokenizationOperation.MERGE_TO_LEFT;
                        matchingTokens.Remove(tokens[i]);
                    }
                    else
                    {
                        // First time this token is seen, move it to the right and remember it
                        operations[i] = DetokenizationOperation.MERGE_TO_RIGHT;
                        matchingTokens.Add(tokens[i]);
                    }
                }
                else
                {
                    throw new InvalidEnumArgumentException("Unknown operation: " + dictOperation);
                }
            }

            return operations;
        }
Example #3
0
        /// <summary>Detokenizes the specified tokens.</summary>
        /// <param name="tokens">The tokens to detokenize.</param>
        /// <returns>The merge operations to detokenize the input tokens.</returns>
        public DetokenizationOperation[] Detokenize(string[] tokens)
        {
            var operations = new DetokenizationOperation[tokens.Length];

            var matchingTokens = new HashSet <string>();

            for (var i = 0; i < tokens.Length; i++)
            {
                var dictOperation = dictionary[tokens[i]];

                switch (dictOperation)
                {
                case DetokenizationDictionary.Operation.MoveRight:
                    operations[i] = DetokenizationOperation.MergeToRight;
                    break;

                case DetokenizationDictionary.Operation.MoveLeft:
                    operations[i] = DetokenizationOperation.MergeToLeft;
                    break;

                case DetokenizationDictionary.Operation.MoveBoth:
                    operations[i] = DetokenizationOperation.MergeBoth;
                    break;

                case DetokenizationDictionary.Operation.RightLeftMatching:

                    if (matchingTokens.Contains(tokens[i]))
                    {
                        // The token already occurred once, move it to the left
                        // and clear the occurrence flag
                        operations[i] = DetokenizationOperation.MergeToLeft;
                        matchingTokens.Remove(tokens[i]);
                    }
                    else
                    {
                        // First time this token is seen, move it to the right
                        // and remember it
                        operations[i] = DetokenizationOperation.MergeToRight;
                        matchingTokens.Add(tokens[i]);
                    }

                    break;

                case null:
                    operations[i] = DetokenizationOperation.NoOperation;
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Unknown operation: " + dictionary[tokens[i]]);
                }
            }
            return(operations);
        }
        /// <summary>Detokenizes the specified tokens.</summary>
        /// <param name="tokens">The tokens to detokenize.</param>
        /// <returns>The merge operations to detokenize the input tokens.</returns>
        public DetokenizationOperation[] Detokenize(string[] tokens) {
            var operations = new DetokenizationOperation[tokens.Length];

            var matchingTokens = new HashSet<string>();

            for (var i = 0; i < tokens.Length; i++) {
                var dictOperation = dictionary[tokens[i]];

                switch (dictOperation) {
                    case DetokenizationDictionary.Operation.MoveRight:
                        operations[i] = DetokenizationOperation.MergeToRight;
                        break;
                    case DetokenizationDictionary.Operation.MoveLeft:
                        operations[i] = DetokenizationOperation.MergeToLeft;
                        break;
                    case DetokenizationDictionary.Operation.MoveBoth:
                        operations[i] = DetokenizationOperation.MergeBoth;
                        break;
                    case DetokenizationDictionary.Operation.RightLeftMatching:

                        if (matchingTokens.Contains(tokens[i])) {
                            // The token already occurred once, move it to the left
                            // and clear the occurrence flag
                            operations[i] = DetokenizationOperation.MergeToLeft;
                            matchingTokens.Remove(tokens[i]);
                        } else {
                            // First time this token is seen, move it to the right
                            // and remember it
                            operations[i] = DetokenizationOperation.MergeToRight;
                            matchingTokens.Add(tokens[i]);
                        }

                        break;
                    case null:
                        operations[i] = DetokenizationOperation.NoOperation;
                        break;
                    default:
                        throw new ArgumentOutOfRangeException("Unknown operation: " + dictionary[tokens[i]]);
                }
            }
            return operations;
        }
Example #5
0
 internal static bool IsMergeToLeft(DetokenizationOperation operation)
 {
     return(operation == DetokenizationOperation.MergeToLeft ||
            operation == DetokenizationOperation.MergeBoth);
 }
Example #6
0
 internal static bool IsMergeToLeft(DetokenizationOperation operation) {
     return operation == DetokenizationOperation.MergeToLeft ||
            operation == DetokenizationOperation.MergeBoth;
 }