public Word(string original, PosTaggerOutputType type, int startIndex = -1) { Original = original; Type = type; StartIndex = startIndex; ErrorCodes = ""; }
public static string ToText(this PosTaggerOutputType posTaggerOutputType) { switch (posTaggerOutputType) { case PosTaggerOutputType.Article: return("Article"); case PosTaggerOutputType.Adjective: return("Adj"); case PosTaggerOutputType.AdjectivePronoun: return("AdjPron"); case PosTaggerOutputType.Adverb: return("Adv"); case PosTaggerOutputType.AdverbialParticiple: return("AdvPart"); case PosTaggerOutputType.AdverbialPronoun: return("AdvPron"); case PosTaggerOutputType.AuxiliaryVerb: return("AuxVerb"); case PosTaggerOutputType.Conjunction: return("Conj"); case PosTaggerOutputType.Gerund: return("Gerund"); case PosTaggerOutputType.Infinitive: return("Inf"); case PosTaggerOutputType.Interjection: return("Intr"); case PosTaggerOutputType.ModalVerb: return("ModalVerb"); case PosTaggerOutputType.Noun: return("Noun"); case PosTaggerOutputType.Numeral: return("Num"); case PosTaggerOutputType.PastParticiple: return("PastParticiple"); case PosTaggerOutputType.Participle: return("Part"); case PosTaggerOutputType.Particle: return("Pr"); case PosTaggerOutputType.PossessivePronoun: return("PosPron"); case PosTaggerOutputType.Predicate: return("Pred"); case PosTaggerOutputType.Preposition: return("Prep"); case PosTaggerOutputType.Pronoun: return("Pron"); case PosTaggerOutputType.Punctuation: return("Punct"); case PosTaggerOutputType.Verb: return("Verb"); default: //case PosTaggerOutputType.Other: return("Other"); } }
public static byte ToCrfByte(this PosTaggerOutputType posTaggerOutputType) { switch (posTaggerOutputType) { case PosTaggerOutputType.Adjective: return((byte)'J'); case PosTaggerOutputType.AdjectivePronoun: return((byte)'R'); case PosTaggerOutputType.Adverb: return((byte)'D'); case PosTaggerOutputType.AdverbialParticiple: return((byte)'X'); case PosTaggerOutputType.AdverbialPronoun: return((byte)'H'); case PosTaggerOutputType.Article: return((byte)'A'); case PosTaggerOutputType.AuxiliaryVerb: return((byte)'G'); case PosTaggerOutputType.Conjunction: return((byte)'C'); case PosTaggerOutputType.Gerund: return((byte)'L'); case PosTaggerOutputType.Infinitive: return((byte)'F'); case PosTaggerOutputType.Interjection: return((byte)'I'); case PosTaggerOutputType.ModalVerb: return((byte)'K'); case PosTaggerOutputType.Noun: return((byte)'N'); case PosTaggerOutputType.Numeral: return((byte)'M'); case PosTaggerOutputType.PastParticiple: return((byte)'B'); case PosTaggerOutputType.Participle: return((byte)'Z'); case PosTaggerOutputType.Particle: return((byte)'W'); case PosTaggerOutputType.PossessivePronoun: return((byte)'S'); case PosTaggerOutputType.Preposition: return((byte)'E'); case PosTaggerOutputType.Pronoun: return((byte)'Y'); case PosTaggerOutputType.Punctuation: return((byte)'T'); case PosTaggerOutputType.Verb: return((byte)'V'); default: //case PosTaggerOutputType.Other: return((byte)'O'); } }
internal static PartOfSpeechEnum?ToPartOfSpeech(PosTaggerOutputType posTaggerOutputType) { switch (posTaggerOutputType) { case PosTaggerOutputType.Adjective: /*case PosTaggerOutputType.AdjectivePronoun :*/ return(PartOfSpeechEnum.Adjective); case PosTaggerOutputType.Adverb: /*case PosTaggerOutputType.AdverbialPronoun :*/ return(PartOfSpeechEnum.Adverb); case PosTaggerOutputType.Article: return(PartOfSpeechEnum.Article); case PosTaggerOutputType.Conjunction: return(PartOfSpeechEnum.Conjunction); case PosTaggerOutputType.Interjection: return(PartOfSpeechEnum.Interjection); case PosTaggerOutputType.Noun: return(PartOfSpeechEnum.Noun); case PosTaggerOutputType.Numeral: return(PartOfSpeechEnum.Numeral); case PosTaggerOutputType.Other: return(PartOfSpeechEnum.Other); case PosTaggerOutputType.Particle: return(PartOfSpeechEnum.Particle); case PosTaggerOutputType.Predicate: return(PartOfSpeechEnum.Predicate); case PosTaggerOutputType.Preposition: return(PartOfSpeechEnum.Preposition); case PosTaggerOutputType.Pronoun: case PosTaggerOutputType.PossessivePronoun: case PosTaggerOutputType.AdjectivePronoun: case PosTaggerOutputType.AdverbialPronoun: return(PartOfSpeechEnum.Pronoun); case PosTaggerOutputType.Verb: case PosTaggerOutputType.Infinitive: case PosTaggerOutputType.AdverbialParticiple: case PosTaggerOutputType.AuxiliaryVerb: case PosTaggerOutputType.Participle: return(PartOfSpeechEnum.Verb); //default: //throw (new ArgumentException(posTaggerOutputType.ToString())); } return(null); }
internal static PartOfSpeechEnum?ToPartOfSpeech(PosTaggerOutputType posTaggerOutputType) { switch (posTaggerOutputType) { case PosTaggerOutputType.Adjective: return(PartOfSpeechEnum.Adjective); case PosTaggerOutputType.Adverb: return(PartOfSpeechEnum.Adverb); case PosTaggerOutputType.Article: return(PartOfSpeechEnum.Article); case PosTaggerOutputType.Conjunction: return(PartOfSpeechEnum.Conjunction); case PosTaggerOutputType.Interjection: return(PartOfSpeechEnum.Interjection); case PosTaggerOutputType.Noun: return(PartOfSpeechEnum.Noun); case PosTaggerOutputType.Numeral: return(PartOfSpeechEnum.Numeral); case PosTaggerOutputType.Other: return(PartOfSpeechEnum.Other); case PosTaggerOutputType.Particle: return(PartOfSpeechEnum.Particle); case PosTaggerOutputType.Predicate: return(PartOfSpeechEnum.Predicate); case PosTaggerOutputType.Preposition: return(PartOfSpeechEnum.Preposition); case PosTaggerOutputType.Pronoun: case PosTaggerOutputType.PossessivePronoun: case PosTaggerOutputType.AdjectivePronoun: case PosTaggerOutputType.AdverbialPronoun: return(PartOfSpeechEnum.Pronoun); case PosTaggerOutputType.Verb: case PosTaggerOutputType.Infinitive: case PosTaggerOutputType.AdverbialParticiple: case PosTaggerOutputType.AuxiliaryVerb: case PosTaggerOutputType.Participle: return(PartOfSpeechEnum.Verb); } return(null); }
public static char ToCrfChar(this PosTaggerOutputType posTaggerOutputType) { return((char)posTaggerOutputType.ToCrfByte()); }
unsafe private List <string> GetPosTaggerAttributes4ModelBuilder( int wordIndex, int wordsCount, PosTaggerOutputType posTaggerOutputType) { var wordsCount_Minus1 = wordsCount - 1; _Result4ModelBuilder.Clear(); _Result4ModelBuilder.Add(/*_Words[ wordIndex ].*/ posTaggerOutputType.ToCrfChar().ToString()); var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount); for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++) { var ngram = ngrams[i]; _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase); #region [.build attr-values.] switch (ngram.CRFAttributesLength) { case 1: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); } #endregion break; case 2: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); } #endregion break; case 3: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_2); } #endregion break; default: #region { for (var j = 0; j < ngram.CRFAttributesLength; j++) { AppendAttrValue(wordIndex, ngram.CRFAttributes[j]); *(_AttributeBufferPtr++) = VERTICAL_SLASH; } // Удалить последний '|' _AttributeBufferPtr--; } #endregion break; } #endregion var crfValue = new string( _AttributeBufferPtrBase, 0, (int)(_AttributeBufferPtr - _AttributeBufferPtrBase)); _Result4ModelBuilder.Add(crfValue); } if (wordIndex == 0) { _Result4ModelBuilder.Add(xlat_Unsafe.BEGIN_OF_SENTENCE); } else if (wordIndex == wordsCount_Minus1) { _Result4ModelBuilder.Add(xlat_Unsafe.END_OF_SENTENCE); } return(_Result4ModelBuilder); }
public Word(string original, PosTaggerOutputType type) { Original = original; Type = type; Errors = new List <Error>(); }