static SentenceMergingTranslationProcessor() { var wordSplitEngine = new StringSplitEngine(WordDelimiterAfterChars); StringSplitsChunkAssigners.Add(new GreedyStringSplitsChunkAssigner(wordSplitEngine)); StringSplitsChunkAssigners.Add(new RandomizedStringSplitsChunkAssigner(wordSplitEngine)); }
/** * divides the full sentence into multiple chunks and assigns them to the source paragraphs. * It tries to split by percentage equivalent to fit the length of the source paragraph */ public void SetTranslation(string targetText) { List <int> sourceChunksTextLength = SentenceParagraphs.ConvertAll(x => x.Text.Length); var breakPositions = EvaluateBreakPositions(targetText, sourceChunksTextLength); string[] targetChunks = StringSplitEngine.SplitAt(targetText, breakPositions); for (int i = 0; i < targetChunks.Length; i++) { SentenceParagraphs[i].Translation = targetChunks[i]; } }
public override int[] GetSplitPositions(List <int> sourceChunksTextLength, string targetText) { _sourceChunksTextLength = sourceChunksTextLength; _targetSplits = StringSplitEngine.Split(targetText); _targetSplitsLength = _targetSplits.ConvertAll(x => x.Length); var rateResults = new List <RateResult>(); for (int i = 0; i < _iterations; i++) { int[] breakPositions = GetRandomBreakPositions(targetText, _sourceChunksTextLength); var rateResult = CalculateRateResult(_sourceChunksTextLength, targetText.Length, breakPositions); rateResults.Add(rateResult); } rateResults = rateResults.OrderBy(x => x.Fitness).ToList(); return(rateResults.First().BreakPositions); }
public override int[] GetSplitPositions(List <int> sourceChunksTextLength, string targetText) { int overallSourceLength = sourceChunksTextLength.Sum(); int currentSentenceParagraph = 0; int currentSourceChunkEndPosition = sourceChunksTextLength[0]; int[] splitPositions = new int[sourceChunksTextLength.Count - 1]; int splitPositionCount = 0; for (int i = 0; i < targetText.Length; i++) { var c = targetText[i]; if (StringSplitEngine.IsSplittable(targetText, i)) { double currentTargetPositionPercentage = (double)i / targetText.Length; double currentSourceChunkEndPositionPercentage = (double)currentSourceChunkEndPosition / overallSourceLength; if (currentTargetPositionPercentage > currentSourceChunkEndPositionPercentage) { currentSentenceParagraph++; currentSourceChunkEndPosition += sourceChunksTextLength[currentSentenceParagraph]; splitPositions[splitPositionCount] = i; splitPositionCount++; } } } //ensure that there is always to correct amount of resulting splitPositions while (splitPositionCount < sourceChunksTextLength.Count - 1) { splitPositions[splitPositionCount] = splitPositionCount > 0 ? splitPositions[splitPositionCount - 1] : 0; splitPositionCount++; } return(splitPositions); }
public GreedyStringSplitsChunkAssigner(StringSplitEngine stringSplitEngine) : base(stringSplitEngine) { }
public AbstractStringSplitsChunkAssigner(StringSplitEngine stringSplitEngine) { StringSplitEngine = stringSplitEngine; }
public RandomizedStringSplitsChunkAssigner(StringSplitEngine stringSplitEngine, int iterations = DefaultIterations) : base(stringSplitEngine) { _iterations = iterations; }