Exemplo n.º 1
0
        public static State InitialStateFromTaggedSentence <_T0>(IList <_T0> words)
            where _T0 : IHasWord
        {
            IList <Tree> preterminals = Generics.NewArrayList();

            for (int index = 0; index < words.Count; ++index)
            {
                IHasWord  hw = words[index];
                CoreLabel wordLabel;
                string    tag;
                if (hw is CoreLabel)
                {
                    wordLabel = (CoreLabel)hw;
                    tag       = wordLabel.Tag();
                }
                else
                {
                    wordLabel = new CoreLabel();
                    wordLabel.SetValue(hw.Word());
                    wordLabel.SetWord(hw.Word());
                    if (!(hw is IHasTag))
                    {
                        throw new ArgumentException("Expected tagged words");
                    }
                    tag = ((IHasTag)hw).Tag();
                    wordLabel.SetTag(tag);
                }
                if (tag == null)
                {
                    throw new ArgumentException("Input word not tagged");
                }
                CoreLabel tagLabel = new CoreLabel();
                tagLabel.SetValue(tag);
                // Index from 1.  Tools downstream from the parser expect that
                // Internally this parser uses the index, so we have to
                // overwrite incorrect indices if the label is already indexed
                wordLabel.SetIndex(index + 1);
                tagLabel.SetIndex(index + 1);
                LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
                LabeledScoredTreeNode tagNode  = new LabeledScoredTreeNode(tagLabel);
                tagNode.AddChild(wordNode);
                // TODO: can we get away with not setting these on the wordLabel?
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                preterminals.Add(tagNode);
            }
            return(new State(preterminals));
        }
 private static string GetString(object o)
 {
     if (o is IHasWord)
     {
         IHasWord h = (IHasWord)o;
         return(h.Word());
     }
     else
     {
         if (o is string)
         {
             return((string)o);
         }
         else
         {
             if (o is ICoreMap)
             {
                 return(((ICoreMap)o).Get(typeof(CoreAnnotations.TextAnnotation)));
             }
             else
             {
                 throw new Exception("Expected token to be either Word or String.");
             }
         }
     }
 }
        /// <summary>Adds a sentence final punctuation mark to sentences that lack one.</summary>
        /// <remarks>
        /// Adds a sentence final punctuation mark to sentences that lack one.
        /// This method adds a period (the first sentence final punctuation word
        /// in a parser language pack) to sentences that don't have one within
        /// the last 3 words (to allow for close parentheses, etc.).  It checks
        /// tags for punctuation, if available, otherwise words.
        /// </remarks>
        /// <param name="sentence">The sentence to check</param>
        /// <param name="length">The length of the sentence (just to avoid recomputation)</param>
        private bool AddSentenceFinalPunctIfNeeded(IList <IHasWord> sentence, int length)
        {
            int start = length - 3;

            if (start < 0)
            {
                start = 0;
            }
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            for (int i = length - 1; i >= start; i--)
            {
                IHasWord item = sentence[i];
                // An object (e.g., CoreLabel) can implement HasTag but not actually store
                // a tag so we need to check that there is something there for this case.
                // If there is, use only it, since word tokens can be ambiguous.
                string tag = null;
                if (item is IHasTag)
                {
                    tag = ((IHasTag)item).Tag();
                }
                if (tag != null && !tag.IsEmpty())
                {
                    if (tlp.IsSentenceFinalPunctuationTag(tag))
                    {
                        return(false);
                    }
                }
                else
                {
                    string str = item.Word();
                    if (tlp.IsPunctuationWord(str))
                    {
                        return(false);
                    }
                }
            }
            // none found so add one.
            if (op.testOptions.verbose)
            {
                log.Info("Adding missing final punctuation to sentence.");
            }
            string[] sfpWords = tlp.SentenceFinalPunctuationWords();
            if (sfpWords.Length > 0)
            {
                sentence.Add(new Word(sfpWords[0]));
            }
            return(true);
        }
Exemplo n.º 4
0
 public virtual void PrintSamples(IList samples, TextWriter @out)
 {
     for (int i = 0; i < document.Count; i++)
     {
         IHasWord word = (IHasWord)document[i];
         string   s    = "null";
         if (word != null)
         {
             s = word.Word();
         }
         @out.Write(StringUtils.PadOrTrim(s, 10));
         foreach (object sample in samples)
         {
             int[] sequence = (int[])sample;
             @out.Write(" " + StringUtils.PadLeft(sequence[i], 2));
         }
         @out.WriteLine();
     }
 }
Exemplo n.º 5
0
        /// <summary>Splits the Word w on the character splitChar.</summary>
        private IHasWord SplitTag(IHasWord w)
        {
            if (splitChar == 0)
            {
                return(w);
            }
            string s     = w.Word();
            int    split = s.LastIndexOf(splitChar);

            if (split <= 0)
            {
                // == 0 isn't allowed - no empty words!
                return(w);
            }
            string word = Sharpen.Runtime.Substring(s, 0, split);
            string tag  = Sharpen.Runtime.Substring(s, split + 1, s.Length);

            return(new TaggedWord(word, tag));
        }
Exemplo n.º 6
0
        private static IList <IHasWord> FixQuotes(IList <IHasWord> input)
        {
            int inputSize = input.Count;
            LinkedList <IHasWord> result = new LinkedList <IHasWord>();

            if (inputSize == 0)
            {
                return(result);
            }
            bool begin;

            // see if there is a quote at the end
            if (input[inputSize - 1].Word().Equals("\""))
            {
                // alternate from the end
                begin = false;
                for (int i = inputSize - 1; i >= 0; i--)
                {
                    IHasWord hw  = input[i];
                    string   tok = hw.Word();
                    if (tok.Equals("\""))
                    {
                        if (begin)
                        {
                            hw.SetWord("``");
                            begin = false;
                        }
                        else
                        {
                            hw.SetWord("\'\'");
                            begin = true;
                        }
                    }
                    // otherwise leave it alone
                    result.AddFirst(hw);
                }
            }
            else
            {
                // end loop
                // alternate from the beginning
                begin = true;
                foreach (IHasWord hw in input)
                {
                    string tok = hw.Word();
                    if (tok.Equals("\""))
                    {
                        if (begin)
                        {
                            hw.SetWord("``");
                            begin = false;
                        }
                        else
                        {
                            hw.SetWord("\'\'");
                            begin = true;
                        }
                    }
                    // otherwise leave it alone
                    result.AddLast(hw);
                }
            }
            // end loop
            return(result);
        }
        protected internal virtual IList <Item> MakeInitialItems <_T0>(IList <_T0> wordList)
            where _T0 : IHasWord
        {
            IList <Item> itemList = new List <Item>();
            int          length   = wordList.Count;
            int          numTags  = tagIndex.Size();

            words          = new int[length];
            taggedWordList = new IList[length];
            int terminalCount = 0;

            originalLabels = new CoreLabel[wordList.Count];
            for (int i = 0; i < length; i++)
            {
                taggedWordList[i] = new List <IntTaggedWord>(numTags);
                IHasWord wordObject = wordList[i];
                if (wordObject is CoreLabel)
                {
                    originalLabels[i] = (CoreLabel)wordObject;
                }
                string wordStr = wordObject.Word();
                //Word context (e.g., morphosyntactic info)
                string wordContextStr = null;
                if (wordObject is IHasContext)
                {
                    wordContextStr = ((IHasContext)wordObject).OriginalText();
                    if (string.Empty.Equals(wordContextStr))
                    {
                        wordContextStr = null;
                    }
                }
                if (!wordIndex.Contains(wordStr))
                {
                    wordStr = LexiconConstants.UnknownWord;
                }
                int word = wordIndex.IndexOf(wordStr);
                words[i] = word;
                for (IEnumerator <IntTaggedWord> tagI = lex.RuleIteratorByWord(word, i, wordContextStr); tagI.MoveNext();)
                {
                    IntTaggedWord tagging = tagI.Current;
                    int           tag     = tagging.tag;
                    //String curTagStr = tagIndex.get(tag);
                    //if (!tagStr.equals("") && !tagStr.equals(curTagStr))
                    //  continue;
                    int state = stateIndex.IndexOf(tagIndex.Get(tag));
                    //itemList.add(makeInitialItem(i,tag,state,1.0*tagging.score));
                    // THIS WILL CAUSE BUGS!!!  Don't use with another A* scorer
                    tempEdge.state = state;
                    tempEdge.head  = i;
                    tempEdge.start = i;
                    tempEdge.end   = i + 1;
                    tempEdge.tag   = tag;
                    itemList.Add(MakeInitialItem(i, tag, state, scorer.IScore(tempEdge)));
                    terminalCount++;
                    taggedWordList[i].Add(new IntTaggedWord(word, tag));
                }
            }
            if (op.testOptions.verbose)
            {
                log.Info("Terminals (# of tag edges in chart): " + terminalCount);
            }
            return(itemList);
        }