public static WordAlignmentMatrix ConvertNativeMatrixToWordAlignmentMatrix(IntPtr nativeMatrix, uint iLen, uint jLen) { int sizeOfPtr = Marshal.SizeOf <IntPtr>(); int sizeOfInt = Marshal.SizeOf <int>(); var matrix = new WordAlignmentMatrix((int)iLen, (int)jLen); for (int i = 0; i < matrix.RowCount; i++) { IntPtr array = Marshal.ReadIntPtr(nativeMatrix, i * sizeOfPtr); for (int j = 0; j < matrix.ColumnCount; j++) { int intVal = Marshal.ReadInt32(array, j * sizeOfInt); AlignmentType value; if (intVal > 0) { value = AlignmentType.Aligned; } else if (intVal == 0) { value = AlignmentType.NotAligned; } else { value = AlignmentType.Unknown; } matrix[i, j] = value; } } return(matrix); }
public static void TrainSegmentPair(IntPtr decoderHandle, IEnumerable <string> sourceSegment, IEnumerable <string> targetSegment, WordAlignmentMatrix matrix) { IntPtr nativeSourceSegment = ConvertStringsToNativeUtf8(sourceSegment); IntPtr nativeTargetSegment = ConvertStringsToNativeUtf8(targetSegment); IntPtr nativeMatrix = IntPtr.Zero; uint iLen = 0, jLen = 0; if (matrix != null) { nativeMatrix = ConvertWordAlignmentMatrixToNativeMatrix(matrix); iLen = (uint)matrix.RowCount; jLen = (uint)matrix.ColumnCount; } try { decoder_trainSentencePair(decoderHandle, nativeSourceSegment, nativeTargetSegment, nativeMatrix, iLen, jLen); } finally { FreeNativeMatrix(nativeMatrix, iLen); Marshal.FreeHGlobal(nativeTargetSegment); Marshal.FreeHGlobal(nativeSourceSegment); } }
public void AddSegmentPair(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null) { CheckDisposed(); IntPtr nativeSourceSegment = Thot.ConvertStringsToNativeUtf8(sourceSegment); IntPtr nativeTargetSegment = Thot.ConvertStringsToNativeUtf8(targetSegment); IntPtr nativeMatrix = IntPtr.Zero; uint iLen = 0, jLen = 0; if (hintMatrix != null) { nativeMatrix = Thot.ConvertWordAlignmentMatrixToNativeMatrix(hintMatrix); iLen = (uint)hintMatrix.RowCount; jLen = (uint)hintMatrix.ColumnCount; } try { Thot.swAlignModel_addSentencePair(Handle, nativeSourceSegment, nativeTargetSegment, nativeMatrix, iLen, jLen); } finally { Thot.FreeNativeMatrix(nativeMatrix, iLen); Marshal.FreeHGlobal(nativeTargetSegment); Marshal.FreeHGlobal(nativeSourceSegment); } }
private static WordAlignmentMatrix CreateModel(AlignedWordPairDto[] dto, int i, int j) { var alignment = new WordAlignmentMatrix(i, j); foreach (AlignedWordPairDto wordPairDto in dto) { alignment[wordPairDto.SourceIndex, wordPairDto.TargetIndex] = true; } return(alignment); }
private static void SetSourceNotAligned(WordAlignmentMatrix matrix, int i) { for (int j = 0; j < matrix.ColumnCount; j++) { if (matrix[i, j] == AlignmentType.Unknown) { matrix[i, j] = AlignmentType.NotAligned; } } }
public void GetBestAlignment_ReturnsCorrectAlignment() { using (var swAlignModel = new ThotWordAlignmentModel(ModelPath)) { string[] sourceSegment = "por favor , ¿ podríamos ver otra habitación ?".Split(' '); string[] targetSegment = "could we see another room , please ?".Split(' '); WordAlignmentMatrix waMatrix = swAlignModel.GetBestAlignment(sourceSegment, targetSegment); Assert.That(waMatrix.ToGizaFormat(sourceSegment, targetSegment), Is.EqualTo("could we see another room , please ?\n" + "NULL ({ }) por ({ 6 }) favor ({ 7 }) , ({ }) ¿ ({ 8 }) podríamos ({ 1 2 }) ver ({ 3 }) otra ({ 4 }) habitación ({ 5 }) ? ({ })\n")); } }
private static WordGraph CreateModel(WordGraphDto dto) { var arcs = new List <WordGraphArc>(); foreach (WordGraphArcDto arcDto in dto.Arcs) { WordAlignmentMatrix alignment = CreateModel(arcDto.Alignment, arcDto.SourceSegmentRange.End - arcDto.SourceSegmentRange.Start, arcDto.Words.Length); arcs.Add(new WordGraphArc(arcDto.PrevState, arcDto.NextState, arcDto.Score, arcDto.Words, alignment, CreateModel(arcDto.SourceSegmentRange), arcDto.IsUnknown, arcDto.Confidences.Cast <double>())); } return(new WordGraph(arcs, dto.FinalStates, dto.InitialStateScore)); }
public static IntPtr ConvertWordAlignmentMatrixToNativeMatrix(WordAlignmentMatrix matrix) { int sizeOfPtr = Marshal.SizeOf <IntPtr>(); IntPtr nativeMatrix = Marshal.AllocHGlobal(matrix.RowCount * sizeOfPtr); for (int i = 0; i < matrix.RowCount; i++) { IntPtr array = Marshal.AllocHGlobal(matrix.ColumnCount); for (int j = 0; j < matrix.ColumnCount; j++) { Marshal.WriteByte(array, j, Convert.ToByte(matrix[i, j])); } Marshal.WriteIntPtr(nativeMatrix, i * sizeOfPtr, array); } return(nativeMatrix); }
public static WordAlignmentMatrix ConvertNativeMatrixToWordAlignmentMatrix(IntPtr nativeMatrix, uint iLen, uint jLen) { int sizeOfPtr = Marshal.SizeOf <IntPtr>(); var matrix = new WordAlignmentMatrix((int)iLen, (int)jLen); for (int i = 0; i < matrix.RowCount; i++) { IntPtr array = Marshal.ReadIntPtr(nativeMatrix, i * sizeOfPtr); for (int j = 0; j < matrix.ColumnCount; j++) { matrix[i, j] = Convert.ToBoolean(Marshal.ReadByte(array, j)); } } return(matrix); }
private static AlignedWordPairDto[] CreateDto(WordAlignmentMatrix matrix) { var wordPairs = new List <AlignedWordPairDto>(); for (int i = 0; i < matrix.RowCount; i++) { for (int j = 0; j < matrix.ColumnCount; j++) { if (matrix[i, j] == AlignmentType.Aligned) { wordPairs.Add(new AlignedWordPairDto { SourceIndex = i, TargetIndex = j }); } } } return(wordPairs.ToArray()); }
public static IReadOnlyList <AlignedWordPairDto> CreateDto(this WordAlignmentMatrix matrix) { var wordPairs = new List <AlignedWordPairDto>(); for (int i = 0; i < matrix.RowCount; i++) { for (int j = 0; j < matrix.ColumnCount; j++) { if (matrix[i, j] == AlignmentType.Aligned) { wordPairs.Add(new AlignedWordPairDto { SourceIndex = i, TargetIndex = j }); } } } return(wordPairs); }
public void TrainSegment_AlignmentSpecified_TranslationCorrect() { using (var smtModel = new ThotSmtModel(TestHelpers.ToyCorpusConfigFileName)) using (ISmtEngine engine = smtModel.CreateEngine()) { TranslationResult result = engine.Translate("maria no dio una bofetada a la bruja verde .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("maria no dio a bofetada to bruja verde .".Split())); var matrix = new WordAlignmentMatrix(10, 7, AlignmentType.Unknown); SetAligned(matrix, 1, 1); SetAligned(matrix, 2, 2); SetAligned(matrix, 3, 2); SetAligned(matrix, 4, 2); SetSourceNotAligned(matrix, 5); SetAligned(matrix, 8, 4); engine.TrainSegment("maria no dio una bofetada a la bruja verde .".Split(), "mary didn't slap the green witch .".Split(), matrix); result = engine.Translate("maria es una bruja .".Split()); Assert.That(result.TargetSegment, Is.EqualTo("mary is a witch .".Split())); } }
private static void SetAligned(WordAlignmentMatrix matrix, int i, int j) { matrix[i, j] = AlignmentType.Aligned; for (int ti = 0; ti < matrix.RowCount; ti++) { if (matrix[ti, j] == AlignmentType.Unknown) { matrix[ti, j] = AlignmentType.NotAligned; } } for (int tj = 0; tj < matrix.ColumnCount; tj++) { if (matrix[i, tj] == AlignmentType.Unknown) { matrix[i, tj] = AlignmentType.NotAligned; } } }
public WordAlignmentMatrix GetBestAlignment(IReadOnlyList <string> sourceSegment, IReadOnlyList <string> targetSegment, WordAlignmentMatrix hintMatrix = null) { CheckDisposed(); IntPtr nativeSourceSegment = Thot.ConvertStringsToNativeUtf8(sourceSegment); IntPtr nativeTargetSegment = Thot.ConvertStringsToNativeUtf8(targetSegment); IntPtr nativeMatrix = hintMatrix == null ? Thot.AllocNativeMatrix(sourceSegment.Count, targetSegment.Count) : Thot.ConvertWordAlignmentMatrixToNativeMatrix(hintMatrix); uint iLen = (uint)sourceSegment.Count; uint jLen = (uint)targetSegment.Count; try { Thot.swAlignModel_getBestAlignment(Handle, nativeSourceSegment, nativeTargetSegment, nativeMatrix, ref iLen, ref jLen); return(Thot.ConvertNativeMatrixToWordAlignmentMatrix(nativeMatrix, iLen, jLen)); } finally { Thot.FreeNativeMatrix(nativeMatrix, iLen); Marshal.FreeHGlobal(nativeTargetSegment); Marshal.FreeHGlobal(nativeSourceSegment); } }