Пример #1
0
        //the system must know the B (begin-chunk), I (in-chunk), O (out-chunk) information for computing f-score
        //since such BIO information is task-dependent, tagIndex.txt is required
        static void getChunkTagMap()
        {
            chunkTagMap.Clear();

            //read the labelMap.txt for chunk tag information
            StreamReader sr = new StreamReader("tagIndex.txt");
            string       a  = sr.ReadToEnd();

            a = a.Replace("\r", "");
            string[] ary = a.Split(Global.lineEndAry, StringSplitOptions.RemoveEmptyEntries);
            foreach (string im in ary)
            {
                string[] imAry  = im.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries);
                int      index  = int.Parse(imAry[1]);
                string[] tagAry = imAry[0].Split(Global.starAry, StringSplitOptions.RemoveEmptyEntries);
                string   tag    = tagAry[tagAry.Length - 1];//the last tag is the current tag
                //merge I-tag/O-tag: no need to use diversified I-tag/O-tag in computing F-score
                if (tag.StartsWith("I"))
                {
                    tag = "I";
                }
                if (tag.StartsWith("O"))
                {
                    tag = "O";
                }
                chunkTagMap[index] = tag;
            }

            sr.Close();
        }
Пример #2
0
        //the system must know the B (begin-chunk), I (in-chunk), O (out-chunk) information for computing f-score
        //since such BIO information is task-dependent, it should be explicitly coded here
        static void getChunkTagMap()
        {
            chunkTagMap.Clear();

            /*
             * Noun phrase chunk task (Sun et al. COLING 2008)'s BIO information
             * O    0
             * I-NP    1
             * B-NP    2
             */
            if (Global.taskBasedChunkInfo == "np.chunk")
            {
                chunkTagMap["0"] = "O";
                chunkTagMap["1"] = "I";
                chunkTagMap["2"] = "B";
            }

            /*
             * biomedical named entity recognition task (Sun et al. IJCAI 2009)'s BIO information
             * I-RNA    0
             * O    1
             * B-protein    2
             * B-RNA    3
             * B-cell_type    4
             * B-cell_line    5
             * B-DNA    6
             * I-protein    7
             * I-DNA    8
             * I-cell_type    9
             * I-cell_line    10
             */
            else if (Global.taskBasedChunkInfo == "bio.ner")
            {
                chunkTagMap["0"]  = "I";
                chunkTagMap["1"]  = "O";
                chunkTagMap["2"]  = "B1";
                chunkTagMap["3"]  = "B2";
                chunkTagMap["4"]  = "B3";
                chunkTagMap["5"]  = "B4";
                chunkTagMap["6"]  = "B5";
                chunkTagMap["7"]  = "I";
                chunkTagMap["8"]  = "I";
                chunkTagMap["9"]  = "I";
                chunkTagMap["10"] = "I";
            }

            /*
             * Chinese word segmentation task (Sun et al. ACL 2012)'s BIO information
             * B    0
             * E    1
             * I    2
             */
            else if (Global.taskBasedChunkInfo == "wd.seg")
            {
                chunkTagMap["0"] = "B";
                chunkTagMap["1"] = "I";
                chunkTagMap["2"] = "I";
            }
            else
            {
                throw new Exception("error");
            }
        }