/// <summary> /// Returns the span (number of spanned tokens) of the tag. /// </summary> private static int GetTagSpan(PairedTag pt) { int s = pt.End - pt.Start - 1; System.Diagnostics.Debug.Assert(s >= 0); return(s); }
public TagAssociation(PairedTag sourceTag, PairedTag targetTag, Core.EditDistance.EditOperation op) { SourceTag = sourceTag; TargetTag = targetTag; Operation = op; }
/// <summary> /// <see cref="M:System.Object.Equals(object)"/> /// </summary> /// <param name="obj">The object to compare with the current object.</param> /// <returns>true if the specified object is equal to the current object; /// otherwise, false. /// </returns> public override bool Equals(object obj) { if (obj == null) { return(false); } if (obj.GetType() != this.GetType()) { return(false); } PairedTag other = obj as PairedTag; return(other.Start == this.Start && other.End == this.End && other.Anchor == this.Anchor); }
public void Add(PairedTag srcTag, PairedTag trgTag, Core.EditDistance.EditOperation op) { System.Diagnostics.Debug.Assert(srcTag != null || trgTag != null); if (op == Core.EditDistance.EditOperation.Undefined) { if (srcTag == null) { op = Core.EditDistance.EditOperation.Insert; } else if (trgTag == null) { op = Core.EditDistance.EditOperation.Delete; } else { op = Core.EditDistance.EditOperation.Change; } } int idx = _Associations.Count; _Associations.Add(new TagAssociation(srcTag, trgTag, op)); if (srcTag != null) { System.Diagnostics.Debug.Assert(srcTag.Start < srcTag.End); _SrcPositionIdx.Add(srcTag.Start, idx); _SrcPositionIdx.Add(srcTag.End, idx); } if (trgTag != null) { System.Diagnostics.Debug.Assert(trgTag.Start < trgTag.End); _TrgPositionIdx.Add(trgTag.Start, idx); _TrgPositionIdx.Add(trgTag.End, idx); } }
public void Add(PairedTag srcTag, PairedTag trgTag) { Add(srcTag, trgTag, Core.EditDistance.EditOperation.Undefined); }
private static int[,] ComputeTagAssociationScores(SimilarityMatrix similarityMatrix, TagPairs srcPairedTags, TagPairs trgPairedTags, bool useEndPositions) { // this should pretty much result in first-come first-serve alignment, but we hopefully // get better associations for nested tags // foreach src tag, compute LCS to each target tag int[,] lcsScores = new int[srcPairedTags.Count, trgPairedTags.Count]; List <int> sourceTokenPositions = new List <int>(); List <int> targetTokenPositions = new List <int>(); TokenIndexLCSScoreProvider scorer = new TokenIndexLCSScoreProvider(similarityMatrix, 0.75, true); for (int p = 0; p < similarityMatrix.SourceTokens.Count; ++p) { sourceTokenPositions.Add(p); } for (int p = 0; p < similarityMatrix.TargetTokens.Count; ++p) { targetTokenPositions.Add(p); } SequenceAlignmentComputer <int> aligner = new SequenceAlignmentComputer <int>(sourceTokenPositions, targetTokenPositions, scorer, null, 1, 1); int uptoSource; int uptoTarget; for (int srcTag = srcPairedTags.Count - 1; srcTag >= 0; --srcTag) { PairedTag sPt = srcPairedTags[srcTag]; uptoSource = (useEndPositions ? sPt.End : sPt.Start); for (int trgTag = trgPairedTags.Count - 1; trgTag >= 0; --trgTag) { PairedTag tPt = trgPairedTags[trgTag]; uptoTarget = (useEndPositions ? tPt.End : tPt.Start); List <AlignedSubstring> result = aligner.Compute(uptoSource, uptoTarget); if (result != null && result.Count > 0) { System.Diagnostics.Debug.Assert(result.Count == 1); // the result is the common subsequence length minus items which were deleted or inserted int score = result[0].Score - (uptoSource - result[0].Score) - (uptoTarget - result[0].Score); // penalize large differences in the spanned width, but not if // we include the end positions in the LCS int malus; if (useEndPositions) { malus = 0; } else { int srcSpan = GetTagSpan(sPt); int trgSpan = GetTagSpan(tPt); malus = Math.Abs(srcSpan - trgSpan) / 2; } lcsScores[srcTag, trgTag] = score - malus; } } } return(lcsScores); }
public TagAssociation(PairedTag sourceTag, PairedTag targetTag) : this(sourceTag, targetTag, Core.EditDistance.EditOperation.Undefined) { }