Esempio n. 1
0
        /// <summary>
        /// Returns the span (number of spanned tokens) of the tag.
        /// </summary>
        private static int GetTagSpan(PairedTag pt)
        {
            int s = pt.End - pt.Start - 1;

            System.Diagnostics.Debug.Assert(s >= 0);
            return(s);
        }
Esempio n. 2
0
 public TagAssociation(PairedTag sourceTag, PairedTag targetTag,
                       Core.EditDistance.EditOperation op)
 {
     SourceTag = sourceTag;
     TargetTag = targetTag;
     Operation = op;
 }
Esempio n. 3
0
        /// <summary>
        /// <see cref="M:System.Object.Equals(object)"/>
        /// </summary>
        /// <param name="obj">The object to compare with the current object.</param>
        /// <returns>true if the specified object is equal to the current object;
        /// otherwise, false.
        /// </returns>
        public override bool Equals(object obj)
        {
            if (obj == null)
            {
                return(false);
            }
            if (obj.GetType() != this.GetType())
            {
                return(false);
            }

            PairedTag other = obj as PairedTag;

            return(other.Start == this.Start && other.End == this.End && other.Anchor == this.Anchor);
        }
Esempio n. 4
0
        public void Add(PairedTag srcTag, PairedTag trgTag,
                        Core.EditDistance.EditOperation op)
        {
            System.Diagnostics.Debug.Assert(srcTag != null || trgTag != null);

            if (op == Core.EditDistance.EditOperation.Undefined)
            {
                if (srcTag == null)
                {
                    op = Core.EditDistance.EditOperation.Insert;
                }
                else if (trgTag == null)
                {
                    op = Core.EditDistance.EditOperation.Delete;
                }
                else
                {
                    op = Core.EditDistance.EditOperation.Change;
                }
            }

            int idx = _Associations.Count;

            _Associations.Add(new TagAssociation(srcTag, trgTag, op));

            if (srcTag != null)
            {
                System.Diagnostics.Debug.Assert(srcTag.Start < srcTag.End);
                _SrcPositionIdx.Add(srcTag.Start, idx);
                _SrcPositionIdx.Add(srcTag.End, idx);
            }

            if (trgTag != null)
            {
                System.Diagnostics.Debug.Assert(trgTag.Start < trgTag.End);
                _TrgPositionIdx.Add(trgTag.Start, idx);
                _TrgPositionIdx.Add(trgTag.End, idx);
            }
        }
Esempio n. 5
0
 public void Add(PairedTag srcTag, PairedTag trgTag)
 {
     Add(srcTag, trgTag, Core.EditDistance.EditOperation.Undefined);
 }
Esempio n. 6
0
        private static int[,] ComputeTagAssociationScores(SimilarityMatrix similarityMatrix,
                                                          TagPairs srcPairedTags,
                                                          TagPairs trgPairedTags,
                                                          bool useEndPositions)
        {
            // this should pretty much result in first-come first-serve alignment, but we hopefully
            //  get better associations for nested tags

            // foreach src tag, compute LCS to each target tag

            int[,] lcsScores = new int[srcPairedTags.Count, trgPairedTags.Count];

            List <int> sourceTokenPositions = new List <int>();
            List <int> targetTokenPositions = new List <int>();

            TokenIndexLCSScoreProvider scorer
                = new TokenIndexLCSScoreProvider(similarityMatrix, 0.75, true);

            for (int p = 0; p < similarityMatrix.SourceTokens.Count; ++p)
            {
                sourceTokenPositions.Add(p);
            }
            for (int p = 0; p < similarityMatrix.TargetTokens.Count; ++p)
            {
                targetTokenPositions.Add(p);
            }

            SequenceAlignmentComputer <int> aligner
                = new SequenceAlignmentComputer <int>(sourceTokenPositions,
                                                      targetTokenPositions, scorer, null, 1, 1);

            int uptoSource;
            int uptoTarget;

            for (int srcTag = srcPairedTags.Count - 1; srcTag >= 0; --srcTag)
            {
                PairedTag sPt = srcPairedTags[srcTag];

                uptoSource = (useEndPositions ? sPt.End : sPt.Start);

                for (int trgTag = trgPairedTags.Count - 1; trgTag >= 0; --trgTag)
                {
                    PairedTag tPt = trgPairedTags[trgTag];
                    uptoTarget = (useEndPositions ? tPt.End : tPt.Start);

                    List <AlignedSubstring> result
                        = aligner.Compute(uptoSource, uptoTarget);

                    if (result != null && result.Count > 0)
                    {
                        System.Diagnostics.Debug.Assert(result.Count == 1);

                        // the result is the common subsequence length minus items which were deleted or inserted
                        int score = result[0].Score
                                    - (uptoSource - result[0].Score)
                                    - (uptoTarget - result[0].Score);

                        // penalize large differences in the spanned width, but not if
                        //  we include the end positions in the LCS
                        int malus;
                        if (useEndPositions)
                        {
                            malus = 0;
                        }
                        else
                        {
                            int srcSpan = GetTagSpan(sPt);
                            int trgSpan = GetTagSpan(tPt);

                            malus = Math.Abs(srcSpan - trgSpan) / 2;
                        }

                        lcsScores[srcTag, trgTag] = score - malus;
                    }
                }
            }

            return(lcsScores);
        }
Esempio n. 7
0
 public TagAssociation(PairedTag sourceTag, PairedTag targetTag)
     : this(sourceTag, targetTag, Core.EditDistance.EditOperation.Undefined)
 {
 }