C# (CSharp) CoreLabel.GetString 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: CoreLabel

메소드/함수: GetString

hotexamples.com에서의 예제들: 10

C# (CSharp) CoreLabel.GetString - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 CoreLabel.GetString에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Set(30)

Word(30)

Get(30)

SetWord(27)

SetValue(27)

Lemma(15)

SetTag(13)

Tag(12)

ContainsKey(11)

GetString(10)

Index(9)

SetIndex(9)

Value(9)

Factory(9)

Remove(8)

SetNER(7)

BeginPosition(6)

SetLemma(6)

SetOriginalText(5)

ToString(4)

get(4)

SetBeginPosition(4)

OriginalText(4)

SetEndPosition(4)

KeySet(3)

ToShorterString(3)

value(2)

Ner(2)

IsNewline(2)

EndPosition(2)

toString(1)

set(1)

lemma(1)

index(1)

SetCategory(1)

endPosition(1)

beginPosition(1)

GetHashCode(1)

Size(1)

LabelFactory(1)

Category(1)

word(1)

예제 #1

파일 보기

        // end featuresCpC
        /// <summary>
        /// For a CRF, this shouldn't be necessary, since the features duplicate
        /// those from CpC, but Huihsin found some valuable, presumably becuase
        /// it modified the regularization a bit.
        /// </summary>
        /// <param name="cInfo">The list of characters</param>
        /// <param name="loc">Position of c in list</param>
        /// <returns>Collection of String features (sparse set of boolean features</returns>
        protected internal virtual ICollection <string> FeaturesCnC <_T0>(PaddedList <_T0> cInfo, int loc)
            where _T0 : CoreLabel
        {
            ICollection <string> features = new List <string>();

            if (flags.useWordn)
            {
                CoreLabel c      = cInfo[loc];
                CoreLabel c2     = cInfo[loc + 1];
                CoreLabel p      = cInfo[loc - 1];
                CoreLabel p2     = cInfo[loc - 2];
                string    charc  = c.GetString <CoreAnnotations.CharAnnotation>();
                string    charc2 = c2.GetString <CoreAnnotations.CharAnnotation>();
                string    charp  = p.GetString <CoreAnnotations.CharAnnotation>();
                string    charp2 = p2.GetString <CoreAnnotations.CharAnnotation>();
                features.Add(charc + "c");
                features.Add(charc2 + "c2");
                features.Add(charp + "p");
                features.Add(charp2 + "p2");
                features.Add(charp2 + charp + "p2p");
                features.Add(charp + charc + "pc");
                features.Add(charc + charc2 + "cc2");
                features.Add(charp + "-" + charc2 + "pc2");
                features.Add("cliqueCnC");
            }
            return(features);
        }

예제 #2

파일 보기

파일: FeatureFactory.cs 프로젝트: zerouid/Stanford.CoreNLP.NET

        /// <summary>Convenience methods for subclasses which use CoreLabel.</summary>
        /// <remarks>
        /// Convenience methods for subclasses which use CoreLabel.  Gets the
        /// word after applying any wordFunction present in the
        /// SeqClassifierFlags.
        /// </remarks>
        /// <param name="label">A CoreLabel</param>
        /// <returns>
        /// The TextAnnotation of the label, perhaps after passing it through
        /// a function (flags.wordFunction)
        /// </returns>
        protected internal virtual string GetWord(CoreLabel label)
        {
            string word = label.GetString <CoreAnnotations.TextAnnotation>();

            if (flags.wordFunction != null)
            {
                word = flags.wordFunction.Apply(word);
            }
            return(word);
        }

예제 #3

파일 보기

        //is EnglishPU
        private static void DictionaryFeaturesC(Type lbeginFieldName, Type lmiddleFieldName, Type lendFieldName, string dictSuffix, ICollection <string> features, CoreLabel p, CoreLabel c, CoreLabel c2)
        {
            string lbegin  = c.GetString(lbeginFieldName);
            string lmiddle = c.GetString(lmiddleFieldName);
            string lend    = c.GetString(lendFieldName);

            features.Add(lbegin + dictSuffix + "-lb");
            features.Add(lmiddle + dictSuffix + "-lm");
            features.Add(lend + dictSuffix + "-le");
            lbegin  = p.GetString(lbeginFieldName);
            lmiddle = p.GetString(lmiddleFieldName);
            lend    = p.GetString(lendFieldName);
            features.Add(lbegin + dictSuffix + "-plb");
            features.Add(lmiddle + dictSuffix + "-plm");
            features.Add(lend + dictSuffix + "-ple");
            lbegin  = c2.GetString(lbeginFieldName);
            lmiddle = c2.GetString(lmiddleFieldName);
            lend    = c2.GetString(lendFieldName);
            features.Add(lbegin + dictSuffix + "-c2lb");
            features.Add(lmiddle + dictSuffix + "-c2lm");
            features.Add(lend + dictSuffix + "-c2le");
        }

예제 #4

파일 보기

        private void DictionaryFeaturesCpC(Type lbeginFieldName, Type lmiddleFieldName, Type lendFieldName, string dictSuffix, ICollection <string> features, CoreLabel p2, CoreLabel p, CoreLabel c, CoreLabel c2)
        {
            string lbegin  = c.GetString(lbeginFieldName);
            string lmiddle = c.GetString(lmiddleFieldName);
            string lend    = c.GetString(lendFieldName);

            features.Add(lbegin + dictSuffix + "-lb");
            features.Add(lmiddle + dictSuffix + "-lm");
            features.Add(lend + dictSuffix + "-le");
            lbegin  = p.GetString(lbeginFieldName);
            lmiddle = p.GetString(lmiddleFieldName);
            lend    = p.Get(lendFieldName);
            features.Add(lbegin + dictSuffix + "-plb");
            features.Add(lmiddle + dictSuffix + "-plm");
            features.Add(lend + dictSuffix + "-ple");
            lbegin  = c2.GetString(lbeginFieldName);
            lmiddle = c2.GetString(lmiddleFieldName);
            lend    = c2.GetString(lendFieldName);
            features.Add(lbegin + dictSuffix + "-c2lb");
            features.Add(lmiddle + dictSuffix + "-c2lm");
            features.Add(lend + dictSuffix + "-c2le");
            if (flags.useDictionaryConjunctions)
            {
                string p2Lend   = p2.GetString(lendFieldName);
                string pLend    = p.GetString(lendFieldName);
                string pLbegin  = p.GetString(lbeginFieldName);
                string cLbegin  = c.GetString(lbeginFieldName);
                string cLmiddle = c.GetString(lmiddleFieldName);
                if (flags.useDictionaryConjunctions3)
                {
                    features.Add(pLend + cLbegin + cLmiddle + dictSuffix + "-pcLconj1");
                }
                features.Add(p2Lend + pLend + cLbegin + cLmiddle + dictSuffix + "-p2pcLconj1");
                features.Add(p2Lend + pLend + pLbegin + cLbegin + cLmiddle + dictSuffix + "-p2pcLconj2");
            }
        }

예제 #5

파일 보기

        private void SetTrueCaseText(CoreLabel l)
        {
            string trueCase     = l.GetString <CoreAnnotations.TrueCaseAnnotation>();
            string text         = l.Word();
            string trueCaseText = text;

            switch (trueCase)
            {
            case "UPPER":
            {
                trueCaseText = text.ToUpper();
                break;
            }

            case "LOWER":
            {
                trueCaseText = text.ToLower();
                break;
            }

            case "INIT_UPPER":
            {
                trueCaseText = char.ToTitleCase(text[0]) + Sharpen.Runtime.Substring(text, 1).ToLower();
                break;
            }

            case "O":
            {
                // The model predicted mixed case, so lookup the map:
                string lower = text.ToLower();
                if (mixedCaseMap.Contains(lower))
                {
                    trueCaseText = mixedCaseMap[lower];
                }
                // else leave it as it was?
                break;
            }
            }
            // System.err.println(text + " was classified as " + trueCase + " and so became " + trueCaseText);
            l.Set(typeof(CoreAnnotations.TrueCaseTextAnnotation), trueCaseText);
            if (overwriteText)
            {
                l.Set(typeof(CoreAnnotations.TextAnnotation), trueCaseText);
                l.Set(typeof(CoreAnnotations.ValueAnnotation), trueCaseText);
            }
        }

예제 #6

파일 보기

        // end featuresCpCp2C
        protected internal virtual ICollection <string> FeaturesCpCp2Cp3C <_T0>(PaddedList <_T0> cInfo, int loc)
            where _T0 : CoreLabel
        {
            ICollection <string> features = new List <string>();

            if (flags.use4Clique && flags.maxLeft >= 3)
            {
                CoreLabel c       = cInfo[loc];
                CoreLabel c2      = cInfo[loc + 1];
                CoreLabel p       = cInfo[loc - 1];
                CoreLabel p2      = cInfo[loc - 2];
                CoreLabel p3      = cInfo[loc - 3];
                string    charc   = c.GetString <CoreAnnotations.CharAnnotation>();
                string    charp   = p.GetString <CoreAnnotations.CharAnnotation>();
                string    charp2  = p2.GetString <CoreAnnotations.CharAnnotation>();
                string    charp3  = p3.GetString <CoreAnnotations.CharAnnotation>();
                int       cI      = c.Get(typeof(CoreAnnotations.UTypeAnnotation));
                string    uTypec  = (cI != null ? cI.ToString() : string.Empty);
                int       c2I     = c2.Get(typeof(CoreAnnotations.UTypeAnnotation));
                string    uTypec2 = (c2I != null ? c2I.ToString() : string.Empty);
                int       pI      = p.Get(typeof(CoreAnnotations.UTypeAnnotation));
                string    uTypep  = (pI != null ? pI.ToString() : string.Empty);
                int       p2I     = p2.Get(typeof(CoreAnnotations.UTypeAnnotation));
                string    uTypep2 = (p2I != null ? p2I.ToString() : string.Empty);
                int       p3I     = p3.Get(typeof(CoreAnnotations.UTypeAnnotation));
                string    uTypep3 = (p3I != null ? p3I.ToString() : string.Empty);
                if (flags.useLongSequences)
                {
                    features.Add(charp3 + charp2 + charp + charc + "p3p2pc");
                }
                if (flags.useUnicodeType4gram || flags.useUnicodeType5gram)
                {
                    features.Add(uTypep3 + "-" + uTypep2 + "-" + uTypep + "-" + uTypec + "-uType4");
                }
                if (flags.useUnicodeType5gram)
                {
                    features.Add(uTypep3 + "-" + uTypep2 + "-" + uTypep + "-" + uTypec + "-" + uTypec2 + "-uType5");
                }
                features.Add("cliqueCpCp2Cp3C");
            }
            return(features);
        }

예제 #7

파일 보기

파일: MUCMentionExtractor.cs 프로젝트: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public override Document NextDoc()
        {
            IList <IList <CoreLabel> > allWords      = new List <IList <CoreLabel> >();
            IList <Tree>             allTrees        = new List <Tree>();
            IList <IList <Mention> > allGoldMentions = new List <IList <Mention> >();
            IList <IList <Mention> > allPredictedMentions;
            IList <ICoreMap>         allSentences = new List <ICoreMap>();
            Annotation docAnno         = new Annotation(string.Empty);
            Pattern    docPattern      = Pattern.Compile("<DOC>(.*?)</DOC>", Pattern.Dotall + Pattern.CaseInsensitive);
            Pattern    sentencePattern = Pattern.Compile("(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)", Pattern.Dotall + Pattern.CaseInsensitive);
            Matcher    docMatcher      = docPattern.Matcher(fileContents);

            if (!docMatcher.Find(currentOffset))
            {
                return(null);
            }
            currentOffset = docMatcher.End();
            string  doc             = docMatcher.Group(1);
            Matcher sentenceMatcher = sentencePattern.Matcher(doc);
            string  ner             = null;
            //Maintain current document ID.
            Pattern docIDPattern = Pattern.Compile("<DOCNO>(.*?)</DOCNO>", Pattern.Dotall + Pattern.CaseInsensitive);
            Matcher docIDMatcher = docIDPattern.Matcher(doc);

            if (docIDMatcher.Find())
            {
                currentDocumentID = docIDMatcher.Group(1);
            }
            else
            {
                currentDocumentID = "documentAfter " + currentDocumentID;
            }
            while (sentenceMatcher.Find())
            {
                string            sentenceString = sentenceMatcher.Group(2);
                IList <CoreLabel> words          = tokenizerFactory.GetTokenizer(new StringReader(sentenceString)).Tokenize();
                // FIXING TOKENIZATION PROBLEMS
                for (int i = 0; i < words.Count; i++)
                {
                    CoreLabel w = words[i];
                    if (i > 0 && w.Word().Equals("$"))
                    {
                        if (!words[i - 1].Word().EndsWith("PRP") && !words[i - 1].Word().EndsWith("WP"))
                        {
                            continue;
                        }
                        words[i - 1].Set(typeof(CoreAnnotations.TextAnnotation), words[i - 1].Word() + "$");
                        words.Remove(i);
                        i--;
                    }
                    else
                    {
                        if (w.Word().Equals("\\/"))
                        {
                            if (words[i - 1].Word().Equals("</COREF>"))
                            {
                                continue;
                            }
                            w.Set(typeof(CoreAnnotations.TextAnnotation), words[i - 1].Word() + "\\/" + words[i + 1].Word());
                            words.Remove(i + 1);
                            words.Remove(i - 1);
                        }
                    }
                }
                // END FIXING TOKENIZATION PROBLEMS
                IList <CoreLabel> sentence = new List <CoreLabel>();
                // MUC accepts embedded coref mentions, so we need to keep a stack for the mentions currently open
                Stack <Mention> stack    = new Stack <Mention>();
                IList <Mention> mentions = new List <Mention>();
                allWords.Add(sentence);
                allGoldMentions.Add(mentions);
                foreach (CoreLabel word in words)
                {
                    string w = word.Get(typeof(CoreAnnotations.TextAnnotation));
                    // found regular token: WORD/POS
                    if (!w.StartsWith("<") && w.Contains("\\/") && w.LastIndexOf("\\/") != w.Length - 2)
                    {
                        int    i_1 = w.LastIndexOf("\\/");
                        string w1  = Sharpen.Runtime.Substring(w, 0, i_1);
                        // we do NOT set POS info here. We take the POS tags from the parser!
                        word.Set(typeof(CoreAnnotations.TextAnnotation), w1);
                        word.Remove(typeof(CoreAnnotations.OriginalTextAnnotation));
                        sentence.Add(word);
                    }
                    else
                    {
                        // found the start SGML tag for a NE, e.g., "<ORGANIZATION>"
                        if (w.StartsWith("<") && !w.StartsWith("<COREF") && !w.StartsWith("</"))
                        {
                            Pattern nerPattern = Pattern.Compile("<(.*?)>");
                            Matcher m          = nerPattern.Matcher(w);
                            m.Find();
                            ner = m.Group(1);
                        }
                        else
                        {
                            // found the end SGML tag for a NE, e.g., "</ORGANIZATION>"
                            if (w.StartsWith("</") && !w.StartsWith("</COREF"))
                            {
                                Pattern nerPattern = Pattern.Compile("</(.*?)>");
                                Matcher m          = nerPattern.Matcher(w);
                                m.Find();
                                string ner1 = m.Group(1);
                                if (ner != null && !ner.Equals(ner1))
                                {
                                    throw new Exception("Unmatched NE labels in MUC file: " + ner + " v. " + ner1);
                                }
                                ner = null;
                            }
                            else
                            {
                                // found the start SGML tag for a coref mention
                                if (w.StartsWith("<COREF"))
                                {
                                    Mention mention = new Mention();
                                    // position of this mention in the sentence
                                    mention.startIndex = sentence.Count;
                                    // extract GOLD info about this coref chain. needed for eval
                                    Pattern idPattern  = Pattern.Compile("ID=\"(.*?)\"");
                                    Pattern refPattern = Pattern.Compile("REF=\"(.*?)\"");
                                    Matcher m          = idPattern.Matcher(w);
                                    m.Find();
                                    mention.mentionID = System.Convert.ToInt32(m.Group(1));
                                    m = refPattern.Matcher(w);
                                    if (m.Find())
                                    {
                                        mention.originalRef = System.Convert.ToInt32(m.Group(1));
                                    }
                                    // open mention. keep track of all open mentions using the stack
                                    stack.Push(mention);
                                }
                                else
                                {
                                    // found the end SGML tag for a coref mention
                                    if (w.Equals("</COREF>"))
                                    {
                                        Mention mention = stack.Pop();
                                        mention.endIndex = sentence.Count;
                                        // this is a closed mention. add it to the final list of mentions
                                        // System.err.printf("Found MENTION: ID=%d, REF=%d\n", mention.mentionID, mention.originalRef);
                                        mentions.Add(mention);
                                    }
                                    else
                                    {
                                        word.Remove(typeof(CoreAnnotations.OriginalTextAnnotation));
                                        sentence.Add(word);
                                    }
                                }
                            }
                        }
                    }
                }
                StringBuilder textContent = new StringBuilder();
                for (int i_2 = 0; i_2 < sentence.Count; i_2++)
                {
                    CoreLabel w = sentence[i_2];
                    w.Set(typeof(CoreAnnotations.IndexAnnotation), i_2 + 1);
                    w.Set(typeof(CoreAnnotations.UtteranceAnnotation), 0);
                    if (i_2 > 0)
                    {
                        textContent.Append(" ");
                    }
                    textContent.Append(w.GetString <CoreAnnotations.TextAnnotation>());
                }
                ICoreMap sentCoreMap = new Annotation(textContent.ToString());
                allSentences.Add(sentCoreMap);
                sentCoreMap.Set(typeof(CoreAnnotations.TokensAnnotation), sentence);
            }
            // assign goldCorefClusterID
            IDictionary <int, Mention> idMention = Generics.NewHashMap();

            // temporary use
            foreach (IList <Mention> goldMentions in allGoldMentions)
            {
                foreach (Mention m in goldMentions)
                {
                    idMention[m.mentionID] = m;
                }
            }
            foreach (IList <Mention> goldMentions_1 in allGoldMentions)
            {
                foreach (Mention m in goldMentions_1)
                {
                    if (m.goldCorefClusterID == -1)
                    {
                        if (m.originalRef == -1)
                        {
                            m.goldCorefClusterID = m.mentionID;
                        }
                        else
                        {
                            int @ref = m.originalRef;
                            while (true)
                            {
                                Mention m2 = idMention[@ref];
                                if (m2.goldCorefClusterID != -1)
                                {
                                    m.goldCorefClusterID = m2.goldCorefClusterID;
                                    break;
                                }
                                else
                                {
                                    if (m2.originalRef == -1)
                                    {
                                        m2.goldCorefClusterID = m2.mentionID;
                                        m.goldCorefClusterID  = m2.goldCorefClusterID;
                                        break;
                                    }
                                    else
                                    {
                                        @ref = m2.originalRef;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            docAnno.Set(typeof(CoreAnnotations.SentencesAnnotation), allSentences);
            stanfordProcessor.Annotate(docAnno);
            if (allSentences.Count != allWords.Count)
            {
                throw new InvalidOperationException("allSentences != allWords");
            }
            for (int i_3 = 0; i_3 < allSentences.Count; i_3++)
            {
                IList <CoreLabel> annotatedSent   = allSentences[i_3].Get(typeof(CoreAnnotations.TokensAnnotation));
                IList <CoreLabel> unannotatedSent = allWords[i_3];
                IList <Mention>   mentionInSent   = allGoldMentions[i_3];
                foreach (Mention m in mentionInSent)
                {
                    m.dependency = allSentences[i_3].Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
                }
                if (annotatedSent.Count != unannotatedSent.Count)
                {
                    throw new InvalidOperationException("annotatedSent != unannotatedSent");
                }
                for (int j = 0; j < sz; j++)
                {
                    CoreLabel annotatedWord   = annotatedSent[j];
                    CoreLabel unannotatedWord = unannotatedSent[j];
                    if (!annotatedWord.Get(typeof(CoreAnnotations.TextAnnotation)).Equals(unannotatedWord.Get(typeof(CoreAnnotations.TextAnnotation))))
                    {
                        throw new InvalidOperationException("annotatedWord != unannotatedWord");
                    }
                }
                allWords.Set(i_3, annotatedSent);
                allTrees.Add(allSentences[i_3].Get(typeof(TreeCoreAnnotations.TreeAnnotation)));
            }
            // extract predicted mentions
            allPredictedMentions = mentionFinder.ExtractPredictedMentions(docAnno, maxID, dictionaries);
            // add the relevant fields to mentions and order them for coref
            return(Arrange(docAnno, allWords, allTrees, allPredictedMentions, allGoldMentions, true));
        }

예제 #8

파일 보기

        //end of CnC
        /// <summary>Second order clique features</summary>
        /// <param name="cInfo">The list of characters</param>
        /// <param name="loc">Position of c in list</param>
        /// <returns>Collection of String features (sparse set of boolean features</returns>
        protected internal virtual ICollection <string> FeaturesCpCp2C <_T0>(PaddedList <_T0> cInfo, int loc)
            where _T0 : CoreLabel
        {
            ICollection <string> features = new List <string>();
            CoreLabel            c        = cInfo[loc];
            CoreLabel            c2       = cInfo[loc + 1];
            CoreLabel            c3       = cInfo[loc + 2];
            CoreLabel            p        = cInfo[loc - 1];
            CoreLabel            p2       = cInfo[loc - 2];
            CoreLabel            p3       = cInfo[loc - 3];
            string charc  = c.GetString <CoreAnnotations.CharAnnotation>();
            string charc2 = c2.GetString <CoreAnnotations.CharAnnotation>();
            string charc3 = c3.GetString <CoreAnnotations.CharAnnotation>();
            string charp  = p.GetString <CoreAnnotations.CharAnnotation>();
            string charp2 = p2.GetString <CoreAnnotations.CharAnnotation>();
            string charp3 = p3.GetString <CoreAnnotations.CharAnnotation>();

            // N-gram features. N is up to 3
            if (flags.useWord3)
            {
                features.Add(charc + "::c");
                features.Add(charc2 + "::n");
                features.Add(charp + "::p");
                features.Add(charp2 + "::p2");
                // trying to restore the features that Huihsin described in SIGHAN 2005 paper
                features.Add(charc + charc2 + "::cn");
                features.Add(charc + charc2 + charc3 + "::cnn2");
                features.Add(charp + charc + "::pc");
                features.Add(charp + charc2 + "::pn");
                features.Add(charp2 + charp + "::p2p");
                features.Add(charp3 + charp2 + charp + "::p3p2p");
                features.Add(charp2 + charc + "::p2c");
                features.Add(charc + charc3 + "::cn2");
            }
            if (flags.useShapeStrings)
            {
                if (flags.useShapeStrings1)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + "ps");
                    features.Add(c.GetString <CoreAnnotations.ShapeAnnotation>() + "cs");
                    features.Add(c2.GetString <CoreAnnotations.ShapeAnnotation>() + "c2s");
                }
                if (flags.useShapeStrings3)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "pscsc2s");
                }
                if (flags.useShapeStrings4)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "p2spscsc2s");
                }
                if (flags.useShapeStrings5)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + c3.GetString <CoreAnnotations.ShapeAnnotation
                                                                                                                                                                                                                                                         >() + "p2spscsc2sc3s");
                }
                if (flags.useWordShapeConjunctions2)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + "pscc");
                    features.Add(charp + c.GetString <CoreAnnotations.ShapeAnnotation>() + "pccs");
                }
                if (flags.useWordShapeConjunctions3)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + "p2spscc");
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "psccc2s");
                    features.Add(charc + c2.GetString <CoreAnnotations.ShapeAnnotation>() + c3.GetString <CoreAnnotations.ShapeAnnotation>() + "ccc2sc3s");
                }
            }

            /*
             * Radical N-gram features. N is upto 4.
             * Smoothing method of N-gram, because there are too many characters in Chinese.
             * (It works better than N-gram when they are used individually. less sparse)
             */
            char rcharc;
            char rcharc2;
            char rcharp;
            char rcharp2;

            if (charc.Length == 0)
            {
                rcharc = 'n';
            }
            else
            {
                rcharc = RadicalMap.GetRadical(charc[0]);
            }
            if (charc2.Length == 0)
            {
                rcharc2 = 'n';
            }
            else
            {
                rcharc2 = RadicalMap.GetRadical(charc2[0]);
            }
            if (charp.Length == 0)
            {
                rcharp = 'n';
            }
            else
            {
                rcharp = RadicalMap.GetRadical(charp[0]);
            }
            if (charp2.Length == 0)
            {
                rcharp2 = 'n';
            }
            else
            {
                rcharp2 = RadicalMap.GetRadical(charp2[0]);
            }
            if (flags.useRad2)
            {
                features.Add(rcharc + "rc");
                features.Add(rcharc2 + "rc2");
                features.Add(rcharp + "rp");
                features.Add(rcharp + rcharc + "rprc");
                features.Add(rcharc + rcharc2 + "rcrc2");
                features.Add(rcharp + rcharc + rcharc2 + "rprcrc2");
            }
            if (flags.useRad2b)
            {
                features.Add(rcharc + "rc");
                features.Add(rcharc2 + "rc2");
                features.Add(rcharp + "rp");
                features.Add(rcharp + rcharc + "rprc");
                features.Add(rcharc + rcharc2 + "rcrc2");
                features.Add(rcharp2 + rcharp + "rp2rp");
            }
            features.Add("cliqueCpCp2C");
            return(features);
        }

예제 #9

파일 보기

        protected internal virtual ICollection <string> FeaturesCpC <_T0>(PaddedList <_T0> cInfo, int loc)
            where _T0 : CoreLabel
        {
            ICollection <string> features = new List <string>();
            CoreLabel            c        = cInfo[loc];
            CoreLabel            c2       = cInfo[loc + 1];
            CoreLabel            c3       = cInfo[loc + 2];
            CoreLabel            p        = cInfo[loc - 1];
            CoreLabel            p2       = cInfo[loc - 2];
            CoreLabel            p3       = cInfo[loc - 3];
            string charc   = c.GetString <CoreAnnotations.CharAnnotation>();
            string charc2  = c2.GetString <CoreAnnotations.CharAnnotation>();
            string charc3  = c3.GetString <CoreAnnotations.CharAnnotation>();
            string charp   = p.GetString <CoreAnnotations.CharAnnotation>();
            string charp2  = p2.GetString <CoreAnnotations.CharAnnotation>();
            string charp3  = p3.GetString <CoreAnnotations.CharAnnotation>();
            int    cI      = c.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec  = (cI != null ? cI.ToString() : string.Empty);
            int    c2I     = c2.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec2 = (c2I != null ? c2I.ToString() : string.Empty);
            int    c3I     = c3.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec3 = (c3I != null ? c3I.ToString() : string.Empty);
            int    pI      = p.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypep  = (pI != null ? pI.ToString() : string.Empty);
            int    p2I     = p2.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypep2 = (p2I != null ? p2I.ToString() : string.Empty);

            if (flags.dictionary != null || flags.serializedDictionary != null)
            {
                DictionaryFeaturesCpC(typeof(CoreAnnotations.LBeginAnnotation), typeof(CoreAnnotations.LMiddleAnnotation), typeof(CoreAnnotations.LEndAnnotation), string.Empty, features, p2, p, c, c2);
            }
            if (flags.dictionary2 != null)
            {
                DictionaryFeaturesCpC(typeof(CoreAnnotations.D2_LBeginAnnotation), typeof(CoreAnnotations.D2_LMiddleAnnotation), typeof(CoreAnnotations.D2_LEndAnnotation), "-D2-", features, p2, p, c, c2);
            }

            /*
             * N-gram features. N is upto 2.
             */
            if (flags.useWord2)
            {
                // features.add(charc +"c");
                // features.add(charc2+"c2");
                // features.add(charp +"p");
                // features.add(charp + charc  +"pc");
                // features.add(charc + charc2  +"cc2");
                // // cdm: need hyphen so you can see which of charp or charc2 is null....
                // features.add(charp + "-" + charc2 + "pc2");
                features.Add(charc + "::c");
                features.Add(charc2 + "::c1");
                features.Add(charp + "::p");
                features.Add(charp2 + "::p2");
                // trying to restore the features that Huihsin described in SIGHAN 2005 paper
                features.Add(charc + charc2 + "::cn");
                // (*)
                features.Add(charp + charc + "::pc");
                features.Add(charp + charc2 + "::pn");
                features.Add(charp2 + charp + "::p2p");
                features.Add(charp2 + charc + "::p2c");
                features.Add(charc2 + charc + "::n2c");
            }
            // todo: this is messed up: Same as one above at (*); should be cn2 = charc + charc3 + "::cn2"
            if (flags.useFeaturesCpC4gram || flags.useFeaturesCpC5gram || flags.useFeaturesCpC6gram)
            {
                // todo: Both these features duplicate ones already in useWord2
                features.Add(charp2 + charp + "p2p");
                features.Add(charp2 + "p2");
            }
            if (flags.useFeaturesCpC5gram || flags.useFeaturesCpC6gram)
            {
                features.Add(charc3 + "c3");
                features.Add(charc2 + charc3 + "c2c3");
            }
            if (flags.useFeaturesCpC6gram)
            {
                features.Add(charp3 + "p3");
                features.Add(charp3 + charp2 + "p3p2");
            }
            if (flags.useGoodForNamesCpC)
            {
                // these 2 features should be distinctively good at biasing from
                // picking up a Chinese family name in the p2 or p3 positions:
                // familyName X X startWord AND familyName X startWord
                // But actually they seem to have negative value.
                features.Add(charp2 + "p2");
                features.Add(charp3 + "p3");
            }
            if (flags.useUnicodeType || flags.useUnicodeType4gram || flags.useUnicodeType5gram)
            {
                features.Add(uTypep + "-" + uTypec + "-" + uTypec2 + "-uType3");
            }
            if (flags.useUnicodeType4gram || flags.useUnicodeType5gram)
            {
                features.Add(uTypep2 + "-" + uTypep + "-" + uTypec + "-" + uTypec2 + "-uType4");
            }
            if (flags.useUnicodeType5gram)
            {
                features.Add(uTypep2 + "-" + uTypep + "-" + uTypec + "-" + uTypec2 + "-" + uTypec3 + "-uType5");
            }
            if (flags.useWordUTypeConjunctions2)
            {
                features.Add(uTypep + charc + "putcc");
                features.Add(charp + uTypec + "pccut");
            }
            if (flags.useWordUTypeConjunctions3)
            {
                features.Add(uTypep2 + uTypep + charc + "p2utputcc");
                features.Add(uTypep + charc + uTypec2 + "putccc2ut");
                features.Add(charc + uTypec2 + uTypec3 + "ccc2utc3ut");
            }
            if (flags.useUnicodeBlock)
            {
                features.Add(p.GetString <CoreAnnotations.UBlockAnnotation>() + "-" + c.GetString <CoreAnnotations.UBlockAnnotation>() + "-" + c2.GetString <CoreAnnotations.UBlockAnnotation>() + "-uBlock");
            }
            if (flags.useShapeStrings)
            {
                if (flags.useShapeStrings1)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + "ps");
                    features.Add(c.GetString <CoreAnnotations.ShapeAnnotation>() + "cs");
                    features.Add(c2.GetString <CoreAnnotations.ShapeAnnotation>() + "c2s");
                }
                if (flags.useShapeStrings3)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "pscsc2s");
                }
                if (flags.useShapeStrings4)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "p2spscsc2s");
                }
                if (flags.useShapeStrings5)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + c3.GetString <CoreAnnotations.ShapeAnnotation
                                                                                                                                                                                                                                                         >() + "p2spscsc2sc3s");
                }
                if (flags.useWordShapeConjunctions2)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + "pscc");
                    features.Add(charp + c.GetString <CoreAnnotations.ShapeAnnotation>() + "pccs");
                }
                if (flags.useWordShapeConjunctions3)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + "p2spscc");
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + charc + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "psccc2s");
                    features.Add(charc + c2.GetString <CoreAnnotations.ShapeAnnotation>() + c3.GetString <CoreAnnotations.ShapeAnnotation>() + "ccc2sc3s");
                }
            }

            /*
             * Radical N-gram features. N is upto 4.
             * Smoothing method of N-gram, because there are too many characters in Chinese.
             * (It works better than N-gram when they are used individually. less sparse)
             */
            char rcharc;
            char rcharc2;
            char rcharp;
            char rcharp2;

            if (charc.Length == 0)
            {
                rcharc = 'n';
            }
            else
            {
                rcharc = RadicalMap.GetRadical(charc[0]);
            }
            if (charc2.Length == 0)
            {
                rcharc2 = 'n';
            }
            else
            {
                rcharc2 = RadicalMap.GetRadical(charc2[0]);
            }
            if (charp.Length == 0)
            {
                rcharp = 'n';
            }
            else
            {
                rcharp = RadicalMap.GetRadical(charp[0]);
            }
            if (charp2.Length == 0)
            {
                rcharp2 = 'n';
            }
            else
            {
                rcharp2 = RadicalMap.GetRadical(charp2[0]);
            }
            if (flags.useRad2)
            {
                features.Add(rcharc + "rc");
                features.Add(rcharc2 + "rc2");
                features.Add(rcharp + "rp");
                features.Add(rcharp + rcharc + "rprc");
                features.Add(rcharc + rcharc2 + "rcrc2");
                features.Add(rcharp + rcharc + rcharc2 + "rprcrc2");
            }
            if (flags.useRad2b)
            {
                features.Add(rcharc + "rc");
                features.Add(rcharc2 + "rc2");
                features.Add(rcharp + "rp");
                features.Add(rcharp + rcharc + "rprc");
                features.Add(rcharc + rcharc2 + "rcrc2");
                features.Add(rcharp2 + rcharp + "rp2rp");
            }

            /* Non-word dictionary: SEEN bi-gram marked as non-word.
             * This is frickin' useful.  I hadn't realized.  CDM Oct 2007.
             */
            if (flags.useDict2)
            {
                NonDict2 nd = new NonDict2(flags);
                features.Add(nd.CheckDic(charp + charc, flags) + "nondict");
            }
            if (flags.useOutDict2)
            {
                if (outDict == null)
                {
                    CreateOutDict();
                }
                features.Add(outDict.GetW(charp + charc) + "outdict");
                // -1 0
                features.Add(outDict.GetW(charc + charc2) + "outdict");
                // 0 1
                features.Add(outDict.GetW(charp2 + charp) + "outdict");
                // -2 -1
                features.Add(outDict.GetW(charp2 + charp + charc) + "outdict");
                // -2 -1 0
                features.Add(outDict.GetW(charp3 + charp2 + charp) + "outdict");
                // -3 -2 -1
                features.Add(outDict.GetW(charp + charc + charc2) + "outdict");
                // -1 0 1
                features.Add(outDict.GetW(charc + charc2 + charc3) + "outdict");
                // 0 1 2
                features.Add(outDict.GetW(charp + charc + charc2 + charc3) + "outdict");
            }
            // -1 0 1 2

            /*
             * (CTB/ASBC/HK/PK/MSR) POS information of each characters.
             * If a character falls into some function categories,
             * it is very likely there is a boundary.
             * A lot of Chinese function words belong to single characters.
             * This feature is also good for numbers and punctuations.
             * DE* are grouped into DE.
             */
            if (flags.useCTBChar2 || flags.useASBCChar2 || flags.useHKChar2 || flags.usePKChar2 || flags.useMSRChar2)
            {
                string[] tagsets;
                // the "useChPos" now only works for CTB and PK
                if (flags.useChPos)
                {
                    if (flags.useCTBChar2)
                    {
                        tagsets = new string[] { "AD", "AS", "BA", "CC", "CD", "CS", "DE", "DT", "ETC", "IJ", "JJ", "LB", "LC", "M", "NN", "NR", "NT", "OD", "P", "PN", "PU", "SB", "SP", "VA", "VC", "VE", "VV" };
                    }
                    else
                    {
                        if (flags.usePKChar2)
                        {
                            //tagsets = new String[]{"r", "j", "t", "a", "nz", "l", "vn", "i", "m", "ns", "nr", "v", "n", "q", "Ng", "b", "d", "nt"};
                            tagsets = new string[] { "2", "3", "4" };
                        }
                        else
                        {
                            throw new Exception("only support settings for CTB and PK now.");
                        }
                    }
                }
                else
                {
                    //logger.info("Using Derived features");
                    tagsets = new string[] { "2", "3", "4" };
                }
                if (taDetector == null)
                {
                    CreateTADetector();
                }
                foreach (string tag in tagsets)
                {
                    features.Add(taDetector.CheckDic(tag + "p", charp) + taDetector.CheckDic(tag + "i", charp) + taDetector.CheckDic(tag + "s", charc) + taDetector.CheckInDic(charp) + taDetector.CheckInDic(charc) + tag + "prep-sufc");
                }
            }
            //features.add("|ctbchar2");

            /*
             * In error analysis, we found English words and numbers are often separated.
             * Rule 1: isNumber feature: check if the current and previous char is a number.
             * Rule 2: Disambiguation of time point and time duration.
             * Rule 3: isEnglish feature: check if the current and previous character is an english letter.
             * Rule 4: English name feature: check if the current char is a conjunct pu for English first and last name, since there is no space between two names.
             * Most of PUs are a good indicator for word boundary, but - and .  is a strong indicator that there is no boundry within a previous , a follow char and it.
             */
            if (flags.useRule2)
            {
                /* Reduplication features */
                // previous character == current character
                if (charp.Equals(charc))
                {
                    features.Add("11-R2");
                }
                // previous character == next character
                if (charp.Equals(charc2))
                {
                    features.Add("22-R2");
                }
                // current character == next next character
                // fire only when usePk and useHk are both false.
                // Notice: this should be (almost) the same as the "22" feature, but we keep it for now.
                if (!flags.usePk && !flags.useHk)
                {
                    if (charc.Equals(charc2))
                    {
                        features.Add("33-R2");
                    }
                }
                char cur1 = ' ';
                char cur2 = ' ';
                char cur  = ' ';
                char pre  = ' ';
                // actually their length must be either 0 or 1
                if (charc2.Length > 0)
                {
                    cur1 = charc2[0];
                }
                if (charc3.Length > 0)
                {
                    cur2 = charc3[0];
                }
                if (charc.Length > 0)
                {
                    cur = charc[0];
                }
                if (charp.Length > 0)
                {
                    pre = charp[0];
                }
                string prer = rcharp.ToString();
                // the radical of previous character
                Pattern E  = Pattern.Compile("[a-zA-Z]");
                Pattern N  = Pattern.Compile("[0-9]");
                Matcher m  = E.Matcher(charp);
                Matcher ce = E.Matcher(charc);
                Matcher pe = E.Matcher(charp2);
                Matcher cn = N.Matcher(charc);
                Matcher pn = N.Matcher(charp2);
                // if current and previous characters are numbers...
                if (cur >= '0' && cur <= '9' && pre >= '0' && pre <= '9')
                {
                    if (cur == '9' && pre == '1' && cur1 == '9' && cur2 >= '0' && cur2 <= '9')
                    {
                        //199x
                        features.Add("YR-R2");
                    }
                    else
                    {
                        features.Add("2N-R2");
                    }
                }
                else
                {
                    // if current and previous characters are not both numbers
                    // but previous char is a number
                    // i.e. patterns like "1N" , "2A", etc
                    if (pre >= '0' && pre <= '9')
                    {
                        features.Add("1N-R2");
                    }
                    else
                    {
                        // if previous character is an English character
                        if (m.Matches())
                        {
                            features.Add("E-R2");
                        }
                        else
                        {
                            // if the previous character contains no radical (and it exist)
                            if (prer.Equals(".") && charp.Length == 1)
                            {
                                if (ce.Matches())
                                {
                                    features.Add("PU+E-R2");
                                }
                                if (pe.Matches())
                                {
                                    features.Add("E+PU-R2");
                                }
                                if (cn.Matches())
                                {
                                    features.Add("PU+N-R2");
                                }
                                if (pn.Matches())
                                {
                                    features.Add("N+PU-R2");
                                }
                                features.Add("PU-R2");
                            }
                        }
                    }
                }
                string engType = IsEnglish(charp, charc);
                string engPU   = IsEngPU(charp);
                if (!engType.Equals(string.Empty))
                {
                    features.Add(engType);
                }
                if (!engPU.Equals(string.Empty) && !engType.Equals(string.Empty))
                {
                    StringBuilder sb = new StringBuilder();
                    sb.Append(engPU).Append(engType).Append("R2");
                    features.Add(sb.ToString());
                }
            }
            //end of use rule
            // features using "Character.getType" information!
            string origS = c.GetString <CoreAnnotations.OriginalCharAnnotation>();
            char   origC = ' ';

            if (origS.Length > 0)
            {
                origC = origS[0];
            }
            int type = char.GetType(origC);

            switch (type)
            {
            case char.UppercaseLetter:
            case char.LowercaseLetter:
            {
                // A-Z and full-width A-Z
                // a-z and full-width a-z
                features.Add("CHARTYPE-LETTER");
                break;
            }

            case char.DecimalDigitNumber:
            {
                features.Add("CHARTYPE-DECIMAL_DIGIT_NUMBER");
                break;
            }

            case char.OtherLetter:
            {
                // mostly chinese chars
                features.Add("CHARTYPE-OTHER_LETTER");
                break;
            }

            default:
            {
                // other types
                features.Add("CHARTYPE-MISC");
                break;
            }
            }
            features.Add("cliqueCpC");
            return(features);
        }

예제 #10

파일 보기

        protected internal virtual ICollection <string> FeaturesC <_T0>(PaddedList <_T0> cInfo, int loc)
            where _T0 : CoreLabel
        {
            ICollection <string> features = new List <string>();
            CoreLabel            c        = cInfo[loc];
            CoreLabel            c2       = cInfo[loc + 1];
            CoreLabel            c3       = cInfo[loc + 2];
            CoreLabel            p        = cInfo[loc - 1];
            CoreLabel            p2       = cInfo[loc - 2];
            CoreLabel            p3       = cInfo[loc - 3];
            string charc   = c.GetString <CoreAnnotations.CharAnnotation>();
            string charc2  = c2.GetString <CoreAnnotations.CharAnnotation>();
            string charc3  = c3.GetString <CoreAnnotations.CharAnnotation>();
            string charp   = p.GetString <CoreAnnotations.CharAnnotation>();
            string charp2  = p2.GetString <CoreAnnotations.CharAnnotation>();
            string charp3  = p3.GetString <CoreAnnotations.CharAnnotation>();
            int    cI      = c.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec  = (cI != null ? cI.ToString() : string.Empty);
            int    c2I     = c2.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec2 = (c2I != null ? c2I.ToString() : string.Empty);
            int    c3I     = c3.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypec3 = (c3I != null ? c3I.ToString() : string.Empty);
            int    pI      = p.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypep  = (pI != null ? pI.ToString() : string.Empty);
            int    p2I     = p2.Get(typeof(CoreAnnotations.UTypeAnnotation));
            string uTypep2 = (p2I != null ? p2I.ToString() : string.Empty);

            /* N-gram features. N is upto 2. */
            if (flags.useWord1)
            {
                // features.add(charc +"c");
                // features.add(charc2+"c2");
                // features.add(charp +"p");
                // features.add(charp + charc  +"pc");
                // features.add(charc + charc2  +"cc2");
                // cdm: need hyphen so you can see which of charp or charc2 is null....
                // features.add(charp + "-" + charc2 + "pc2");
                features.Add(charc + "::c");
                features.Add(charc2 + "::c2");
                features.Add(charp + "::p");
                features.Add(charp2 + "::p2");
                // trying to restore the features that Huishin described in SIGHAN 2005 paper
                features.Add(charc + charc2 + "::cn");
                features.Add(charc + charc3 + "::cn2");
                features.Add(charp + charc + "::pc");
                features.Add(charp + charc2 + "::pn");
                features.Add(charp2 + charp + "::p2p");
                features.Add(charp2 + charc + "::p2c");
                features.Add(charc2 + charc + "::n2c");
            }
            if (flags.dictionary != null || flags.serializedDictionary != null)
            {
                DictionaryFeaturesC(typeof(CoreAnnotations.LBeginAnnotation), typeof(CoreAnnotations.LMiddleAnnotation), typeof(CoreAnnotations.LEndAnnotation), string.Empty, features, p, c, c2);
            }
            if (flags.dictionary2 != null)
            {
                DictionaryFeaturesC(typeof(CoreAnnotations.D2_LBeginAnnotation), typeof(CoreAnnotations.D2_LMiddleAnnotation), typeof(CoreAnnotations.D2_LEndAnnotation), "-D2-", features, p, c, c2);
            }
            if (flags.useFeaturesC4gram || flags.useFeaturesC5gram || flags.useFeaturesC6gram)
            {
                features.Add(charp2 + charp + "p2p");
                features.Add(charp2 + "p2");
            }
            if (flags.useFeaturesC5gram || flags.useFeaturesC6gram)
            {
                features.Add(charc3 + "c3");
                features.Add(charc2 + charc3 + "c2c3");
            }
            if (flags.useFeaturesC6gram)
            {
                features.Add(charp3 + "p3");
                features.Add(charp3 + charp2 + "p3p2");
            }
            if (flags.useUnicodeType || flags.useUnicodeType4gram || flags.useUnicodeType5gram)
            {
                features.Add(uTypep + "-" + uTypec + "-" + uTypec2 + "-uType3");
            }
            if (flags.useUnicodeType4gram || flags.useUnicodeType5gram)
            {
                features.Add(uTypep2 + "-" + uTypep + "-" + uTypec + "-" + uTypec2 + "-uType4");
            }
            if (flags.useUnicodeType5gram)
            {
                features.Add(uTypep2 + "-" + uTypep + "-" + uTypec + "-" + uTypec2 + "-" + uTypec3 + "-uType5");
            }
            if (flags.useUnicodeBlock)
            {
                features.Add(p.GetString <CoreAnnotations.UBlockAnnotation>() + "-" + c.GetString <CoreAnnotations.UBlockAnnotation>() + "-" + c2.GetString <CoreAnnotations.UBlockAnnotation>() + "-uBlock");
            }
            if (flags.useShapeStrings)
            {
                if (flags.useShapeStrings1)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + "ps");
                    features.Add(c.GetString <CoreAnnotations.ShapeAnnotation>() + "cs");
                    features.Add(c2.GetString <CoreAnnotations.ShapeAnnotation>() + "c2s");
                }
                if (flags.useShapeStrings3)
                {
                    features.Add(p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "pscsc2s");
                }
                if (flags.useShapeStrings4)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + "p2spscsc2s");
                }
                if (flags.useShapeStrings5)
                {
                    features.Add(p2.GetString <CoreAnnotations.ShapeAnnotation>() + p.GetString <CoreAnnotations.ShapeAnnotation>() + c.GetString <CoreAnnotations.ShapeAnnotation>() + c2.GetString <CoreAnnotations.ShapeAnnotation>() + c3.GetString <CoreAnnotations.ShapeAnnotation
                                                                                                                                                                                                                                                         >() + "p2spscsc2sc3s");
                }
            }
            features.Add("cliqueC");
            return(features);
        }