public override DechunkOperation[] GetDechunkerOperations(string[] chunks) { var operations = new DechunkOperation[chunks.Length]; var matchingTokens = new HashSet<string>(); for (int i = 0; i < chunks.Length; i++) { var matchingRegexes = _regexToDechunkOperation .Where(ent => ent.Key.IsMatch(chunks[i])) .ToList(); if (!matchingRegexes.Any()) { operations[i] = DechunkOperation.NO_OPERATION; continue; } else { if (matchingRegexes.Count > 1) { // TODO: log issue, should not happen } var operation = matchingRegexes.First().Value; if (operation == DechunkOperation.MERGE_TO_LEFT || operation == DechunkOperation.MERGE_TO_RIGHT || operation == DechunkOperation.MERGE_BOTH) { operations[i] = operation; } else if (operation == DechunkOperation.MERGE_BOTH_IF_SURROUNDED_BY_WORDS) { if (0 < i && i < chunks.Length - 1 && EndByWordRegex.IsMatch(chunks[i - 1]) && StartByWordRegex.IsMatch(chunks[i + 1])) { operations[i] = DechunkOperation.MERGE_BOTH; } else { operations[i] = DechunkOperation.NO_OPERATION; } } else if (operation == DechunkOperation.RIGHT_LEFT_MATCHING) { if (matchingTokens.Contains(chunks[i])) { // The token already occurred once, move it to the left and clear the occurrence flag operations[i] = DechunkOperation.MERGE_TO_LEFT; matchingTokens.Remove(chunks[i]); } else { // First time this token is seen, move it to the right and remember it operations[i] = DechunkOperation.MERGE_TO_RIGHT; matchingTokens.Add(chunks[i]); } } else { throw new InvalidEnumArgumentException("Unknown operation: " + operation); } } } return operations; }
public override DechunkOperation[] GetDechunkerOperations(string[] chunks) { var operations = new DechunkOperation[chunks.Length]; var matchingTokens = new HashSet <string>(); for (int i = 0; i < chunks.Length; i++) { var matchingRegexes = _regexToDechunkOperation .Where(ent => ent.Key.IsMatch(chunks[i])) .ToList(); if (!matchingRegexes.Any()) { operations[i] = DechunkOperation.NO_OPERATION; continue; } else { if (matchingRegexes.Count > 1) { // TODO: log issue, should not happen } var operation = matchingRegexes.First().Value; if (operation == DechunkOperation.MERGE_TO_LEFT || operation == DechunkOperation.MERGE_TO_RIGHT || operation == DechunkOperation.MERGE_BOTH) { operations[i] = operation; } else if (operation == DechunkOperation.MERGE_BOTH_IF_SURROUNDED_BY_WORDS) { if (0 < i && i < chunks.Length - 1 && EndByWordRegex.IsMatch(chunks[i - 1]) && StartByWordRegex.IsMatch(chunks[i + 1])) { operations[i] = DechunkOperation.MERGE_BOTH; } else { operations[i] = DechunkOperation.NO_OPERATION; } } else if (operation == DechunkOperation.RIGHT_LEFT_MATCHING) { if (matchingTokens.Contains(chunks[i])) { // The token already occurred once, move it to the left and clear the occurrence flag operations[i] = DechunkOperation.MERGE_TO_LEFT; matchingTokens.Remove(chunks[i]); } else { // First time this token is seen, move it to the right and remember it operations[i] = DechunkOperation.MERGE_TO_RIGHT; matchingTokens.Add(chunks[i]); } } else { throw new InvalidEnumArgumentException("Unknown operation: " + operation); } } } return(operations); }