Exemple #1
0
 public Word(string original, PosTaggerOutputType type, int startIndex = -1)
 {
     Original   = original;
     Type       = type;
     StartIndex = startIndex;
     ErrorCodes = "";
 }
        public static string ToText(this PosTaggerOutputType posTaggerOutputType)
        {
            switch (posTaggerOutputType)
            {
            case PosTaggerOutputType.Article:             return("Article");

            case PosTaggerOutputType.Adjective:           return("Adj");

            case PosTaggerOutputType.AdjectivePronoun:    return("AdjPron");

            case PosTaggerOutputType.Adverb:              return("Adv");

            case PosTaggerOutputType.AdverbialParticiple: return("AdvPart");

            case PosTaggerOutputType.AdverbialPronoun:    return("AdvPron");

            case PosTaggerOutputType.AuxiliaryVerb:       return("AuxVerb");

            case PosTaggerOutputType.Conjunction:         return("Conj");

            case PosTaggerOutputType.Gerund:              return("Gerund");

            case PosTaggerOutputType.Infinitive:          return("Inf");

            case PosTaggerOutputType.Interjection:        return("Intr");

            case PosTaggerOutputType.ModalVerb:           return("ModalVerb");

            case PosTaggerOutputType.Noun:                return("Noun");

            case PosTaggerOutputType.Numeral:             return("Num");

            case PosTaggerOutputType.PastParticiple:      return("PastParticiple");

            case PosTaggerOutputType.Participle:          return("Part");

            case PosTaggerOutputType.Particle:            return("Pr");

            case PosTaggerOutputType.PossessivePronoun:   return("PosPron");

            case PosTaggerOutputType.Predicate:           return("Pred");

            case PosTaggerOutputType.Preposition:         return("Prep");

            case PosTaggerOutputType.Pronoun:             return("Pron");

            case PosTaggerOutputType.Punctuation:         return("Punct");

            case PosTaggerOutputType.Verb:                return("Verb");

            default:     //case PosTaggerOutputType.Other:
                return("Other");
            }
        }
        public static byte ToCrfByte(this PosTaggerOutputType posTaggerOutputType)
        {
            switch (posTaggerOutputType)
            {
            case PosTaggerOutputType.Adjective:           return((byte)'J');

            case PosTaggerOutputType.AdjectivePronoun:    return((byte)'R');

            case PosTaggerOutputType.Adverb:              return((byte)'D');

            case PosTaggerOutputType.AdverbialParticiple: return((byte)'X');

            case PosTaggerOutputType.AdverbialPronoun:    return((byte)'H');

            case PosTaggerOutputType.Article:             return((byte)'A');

            case PosTaggerOutputType.AuxiliaryVerb:       return((byte)'G');

            case PosTaggerOutputType.Conjunction:         return((byte)'C');

            case PosTaggerOutputType.Gerund:              return((byte)'L');

            case PosTaggerOutputType.Infinitive:          return((byte)'F');

            case PosTaggerOutputType.Interjection:        return((byte)'I');

            case PosTaggerOutputType.ModalVerb:           return((byte)'K');

            case PosTaggerOutputType.Noun:                return((byte)'N');

            case PosTaggerOutputType.Numeral:             return((byte)'M');

            case PosTaggerOutputType.PastParticiple:      return((byte)'B');

            case PosTaggerOutputType.Participle:          return((byte)'Z');

            case PosTaggerOutputType.Particle:            return((byte)'W');

            case PosTaggerOutputType.PossessivePronoun:   return((byte)'S');

            case PosTaggerOutputType.Preposition:         return((byte)'E');

            case PosTaggerOutputType.Pronoun:             return((byte)'Y');

            case PosTaggerOutputType.Punctuation:         return((byte)'T');

            case PosTaggerOutputType.Verb:                return((byte)'V');

            default:     //case PosTaggerOutputType.Other:
                return((byte)'O');
            }
        }
        internal static PartOfSpeechEnum?ToPartOfSpeech(PosTaggerOutputType posTaggerOutputType)
        {
            switch (posTaggerOutputType)
            {
            case PosTaggerOutputType.Adjective:
                /*case PosTaggerOutputType.AdjectivePronoun :*/ return(PartOfSpeechEnum.Adjective);

            case PosTaggerOutputType.Adverb:
                /*case PosTaggerOutputType.AdverbialPronoun :*/ return(PartOfSpeechEnum.Adverb);

            case PosTaggerOutputType.Article: return(PartOfSpeechEnum.Article);

            case PosTaggerOutputType.Conjunction: return(PartOfSpeechEnum.Conjunction);

            case PosTaggerOutputType.Interjection: return(PartOfSpeechEnum.Interjection);

            case PosTaggerOutputType.Noun: return(PartOfSpeechEnum.Noun);

            case PosTaggerOutputType.Numeral: return(PartOfSpeechEnum.Numeral);

            case PosTaggerOutputType.Other: return(PartOfSpeechEnum.Other);

            case PosTaggerOutputType.Particle: return(PartOfSpeechEnum.Particle);

            case PosTaggerOutputType.Predicate: return(PartOfSpeechEnum.Predicate);

            case PosTaggerOutputType.Preposition: return(PartOfSpeechEnum.Preposition);

            case PosTaggerOutputType.Pronoun:
            case PosTaggerOutputType.PossessivePronoun:
            case PosTaggerOutputType.AdjectivePronoun:
            case PosTaggerOutputType.AdverbialPronoun: return(PartOfSpeechEnum.Pronoun);

            case PosTaggerOutputType.Verb:
            case PosTaggerOutputType.Infinitive:
            case PosTaggerOutputType.AdverbialParticiple:
            case PosTaggerOutputType.AuxiliaryVerb:
            case PosTaggerOutputType.Participle: return(PartOfSpeechEnum.Verb);

                //default:
                //throw (new ArgumentException(posTaggerOutputType.ToString()));
            }

            return(null);
        }
        internal static PartOfSpeechEnum?ToPartOfSpeech(PosTaggerOutputType posTaggerOutputType)
        {
            switch (posTaggerOutputType)
            {
            case PosTaggerOutputType.Adjective:
                return(PartOfSpeechEnum.Adjective);

            case PosTaggerOutputType.Adverb:
                return(PartOfSpeechEnum.Adverb);

            case PosTaggerOutputType.Article: return(PartOfSpeechEnum.Article);

            case PosTaggerOutputType.Conjunction: return(PartOfSpeechEnum.Conjunction);

            case PosTaggerOutputType.Interjection: return(PartOfSpeechEnum.Interjection);

            case PosTaggerOutputType.Noun: return(PartOfSpeechEnum.Noun);

            case PosTaggerOutputType.Numeral: return(PartOfSpeechEnum.Numeral);

            case PosTaggerOutputType.Other: return(PartOfSpeechEnum.Other);

            case PosTaggerOutputType.Particle: return(PartOfSpeechEnum.Particle);

            case PosTaggerOutputType.Predicate: return(PartOfSpeechEnum.Predicate);

            case PosTaggerOutputType.Preposition: return(PartOfSpeechEnum.Preposition);

            case PosTaggerOutputType.Pronoun:
            case PosTaggerOutputType.PossessivePronoun:
            case PosTaggerOutputType.AdjectivePronoun:
            case PosTaggerOutputType.AdverbialPronoun: return(PartOfSpeechEnum.Pronoun);

            case PosTaggerOutputType.Verb:
            case PosTaggerOutputType.Infinitive:
            case PosTaggerOutputType.AdverbialParticiple:
            case PosTaggerOutputType.AuxiliaryVerb:
            case PosTaggerOutputType.Participle: return(PartOfSpeechEnum.Verb);
            }

            return(null);
        }
 public static char ToCrfChar(this PosTaggerOutputType posTaggerOutputType)
 {
     return((char)posTaggerOutputType.ToCrfByte());
 }
Exemple #7
0
        unsafe private List <string> GetPosTaggerAttributes4ModelBuilder(
            int wordIndex, int wordsCount, PosTaggerOutputType posTaggerOutputType)
        {
            var wordsCount_Minus1 = wordsCount - 1;

            _Result4ModelBuilder.Clear();

            _Result4ModelBuilder.Add(/*_Words[ wordIndex ].*/ posTaggerOutputType.ToCrfChar().ToString());

            var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount);

            for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++)
            {
                var ngram = ngrams[i];

                _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase);

                #region [.build attr-values.]
                switch (ngram.CRFAttributesLength)
                {
                case 1:
                    #region
                {
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_0);
                }
                    #endregion
                    break;

                case 2:
                    #region
                {
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_1);
                }
                    #endregion
                    break;

                case 3:
                    #region
                {
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                    AppendAttrValue(wordIndex, ngram.CRFAttribute_2);
                }
                    #endregion
                    break;

                default:
                    #region
                {
                    for (var j = 0; j < ngram.CRFAttributesLength; j++)
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttributes[j]); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                    }
                    // Удалить последний '|'
                    _AttributeBufferPtr--;
                }
                    #endregion
                    break;
                }
                #endregion

                var crfValue = new string( _AttributeBufferPtrBase, 0, (int)(_AttributeBufferPtr - _AttributeBufferPtrBase));
                _Result4ModelBuilder.Add(crfValue);
            }

            if (wordIndex == 0)
            {
                _Result4ModelBuilder.Add(xlat_Unsafe.BEGIN_OF_SENTENCE);
            }
            else
            if (wordIndex == wordsCount_Minus1)
            {
                _Result4ModelBuilder.Add(xlat_Unsafe.END_OF_SENTENCE);
            }

            return(_Result4ModelBuilder);
        }
Exemple #8
0
 public Word(string original, PosTaggerOutputType type)
 {
     Original = original;
     Type     = type;
     Errors   = new List <Error>();
 }