Example #1
0
        public bool Analyze(string[] tFeature)
        {
            int     i;
            NJDNode tNode;

            Debug.LogWarning("要素数:" + tFeature.Length);

            for (i = 0; i < tFeature.Length; i++)
            {
                Debug.LogWarning("要素[ " + i + " ] " + tFeature[i]);

                tNode = new NJDNode();
                tNode.Load(tFeature[i]);

                PushNode(tNode);
            }

            Debug.Log("====================================");

            SetPronunciation();
            SetDigit();
            SetAccentPhrase();
            SetAccentType();
            SetUnvoicedVowel();

            return(true);
        }
Example #2
0
        //---------------------------------------------------------------------------

        private int GetDigit(NJDNode node, int convert_flag)
        {
            int i;

            if (node.Word == "*")
            {
                return(-1);
            }

            if (node.PosGroup1 == NJD_SET_DIGIT_KAZU)
            {
                for (i = 0; njd_set_digit_rule_numeral_list1[i] != null; i += 3)
                {
                    if (njd_set_digit_rule_numeral_list1[i] == node.Word)
                    {
                        if (convert_flag == 1)
                        {
                            node.Word = njd_set_digit_rule_numeral_list1[i + 2];
                            node.Orig = njd_set_digit_rule_numeral_list1[i + 2];
                        }

                        return(Atoi(njd_set_digit_rule_numeral_list1[i + 1]));
                    }
                }
            }

            return(-1);
        }
Example #3
0
        private NJDNode remove_node(NJDNode node)
        {
            NJDNode next;

            if (node == this.head && node == this.tail)
            {
                this.head = null;
                this.tail = null;
                next      = null;
            }
            else
            if (node == this.head)
            {
                this.head      = node.next;
                this.head.prev = null;
                next           = this.head;
            }
            else
            if (node == this.tail)
            {
                this.tail      = node.prev;
                this.tail.next = null;
                next           = null;
            }
            else
            {
                node.prev.next = node.next;
                node.next.prev = node.prev;
                next           = node.next;
            }

            node.Initialize();

            return(next);
        }
Example #4
0
        private void ConvertNumerativePron(string[] list, NJDNode node1, NJDNode node2)
        {
            int    i, j;
            int    type = 0;
            string str  = node1.Word;
            string buff;

            if (str == "*")
            {
                return;
            }

            for (i = 0; list[i] != null; i += 2)
            {
                if (list[i] == str)
                {
                    type = Atoi(list[i + 1]);
                    break;
                }
            }

            if (type == 1)
            {
                for (i = 0; njd_set_digit_rule_voiced_sound_symbol_list[i] != null; i += 2)
                {
                    str = node2.Pron;

                    j = StrTopCmp(str, njd_set_digit_rule_voiced_sound_symbol_list[i]);
                    if (j >= 0)
                    {
                        buff       = njd_set_digit_rule_voiced_sound_symbol_list[i + 1];
                        buff      += str.Substring(j);
                        node2.Pron = buff;
                        break;
                    }
                }
            }
            else
            if (type == 2)
            {
                for (i = 0; njd_set_digit_rule_semivoiced_sound_symbol_list[i] != null; i += 2)
                {
                    str = node2.Pron;
                    j   = StrTopCmp(str, njd_set_digit_rule_semivoiced_sound_symbol_list[i]);
                    if (j >= 0)
                    {
                        buff       = njd_set_digit_rule_semivoiced_sound_symbol_list[i + 1];
                        buff      += str.Substring(j);
                        node2.Pron = buff;
                        break;
                    }
                }
            }
        }
Example #5
0
        private void Copy(NJDNode tNode1, NJDNode tNode2)
        {
            tNode1.m_Word      = tNode2.m_Word;
            tNode1.m_Pos       = tNode2.m_Pos;
            tNode1.m_PosGroup1 = tNode2.m_PosGroup1;
            tNode1.m_PosGroup2 = tNode2.m_PosGroup2;
            tNode1.m_PosGroup3 = tNode2.m_PosGroup3;
            tNode1.m_CType     = tNode2.m_CType;
            tNode1.m_CForm     = tNode2.m_CForm;
            tNode1.m_ChainRule = tNode2.m_ChainRule;
            tNode1.m_ChainFlag = tNode2.m_ChainFlag;

            tNode1.m_Orig     = tNode2.m_Orig;
            tNode1.m_Read     = tNode2.m_Read;
            tNode1.m_Pron     = tNode2.m_Pron;
            tNode1.m_Acc      = tNode2.m_Acc;
            tNode1.m_MoraSize = tNode2.m_MoraSize;
        }
Example #6
0
        private void PushNode(NJDNode node)
        {
            if (this.head == null)
            {
                this.head = node;
            }
            else
            {
                this.tail.next = node;
                node.prev      = this.tail;
            }

            while (node.next != null)
            {
                node = node.next;
            }

            this.tail = node;
        }
Example #7
0
        private int SearchNumerativeClass(string[] list, NJDNode node)
        {
            int    i;
            string str = node.Word;

            if (str == "*")
            {
                return(0);
            }

            for (i = 0; list[i] != null; i++)
            {
                if (list[i] == str)
                {
                    return(1);
                }
            }

            return(0);
        }
Example #8
0
        public void Initialize()
        {
            m_Word      = null;
            m_Pos       = null;
            m_PosGroup1 = null;
            m_PosGroup2 = null;
            m_PosGroup3 = null;
            m_CType     = null;
            m_CForm     = null;
            m_ChainRule = null;
            m_ChainFlag = -1;

            m_Orig     = null;
            m_Read     = null;
            m_Pron     = null;
            m_Acc      = 0;
            m_MoraSize = 0;

            this.prev = null;
            this.next = null;
        }
Example #9
0
        private void ConvertDigitPron(string[] list, NJDNode node)
        {
            int    i;
            string str = node.Word;

            if (str == "*")
            {
                return;
            }

            for (i = 0; list[i] != null; i += 4)
            {
                if (list[i] == str)
                {
                    node.Pron     = list[i + 1];
                    node.Acc      = Atoi(list[i + 2]);
                    node.MoraSize = Atoi(list[i + 3]);
                    return;
                }
            }
        }
Example #10
0
        public static NJDNode Insert(NJDNode prev, NJDNode next, NJDNode node)
        {
            NJDNode tail;

            if (prev == null || next == null)
            {
                Debug.LogWarning("ERROR: NJDNode_insert() in njd_node.c: NJDNodes are not specified.");
                return(null);
            }

            for (tail = node; tail.next != null; tail = tail.next)
            {
                ;
            }

            prev.next = node;
            node.prev = prev;
            next.prev = tail;
            tail.next = next;

            return(tail);
        }
        public void SetUnvoicedVowel()
        {
            NJDNode node;
            int     index;
            int     len;
            string  buff;
            string  str;

            // mora information for current, next, and next-next moras
            string  mora1 = null, mora2 = null, mora3 = null;
            NJDNode nlink1 = null, nlink2 = null, nlink3 = null;
            int     size1 = 0, size2 = 0, size3 = 0;
            int     flag1 = -1, flag2 = -1, flag3 = -1;                 // unknown:-1, voice:0, unvoiced:1
            int     midx1 = 0, midx2 = 1, midx3 = 2;
            int     atype1 = 0, atype2 = 0, atype3 = 0;

            for (node = this.head; node != null; node = node.next)
            {
                buff = "";

                // get pronunciation
                str = node.Pron;
                len = str.Length;

                // parse pronunciation
                for (index = 0; index < len;)
                {
                    // get mora information
                    if (mora1 == null)
                    {
                        GetMoraInformation(node, index, ref mora1, ref nlink1, ref flag1, ref size1, ref midx1, ref atype1);
                    }

                    if (mora1 == null)
                    {
                        Debug.LogError("WARNING: set_unvoiced_vowel() in njd_set_unvoiced_vowel.c: Wrong pron.");
                        return;
                    }

                    if (mora2 == null)
                    {
                        midx2  = midx1 + 1;
                        atype2 = atype1;
                        GetMoraInformation(node, index + size1, ref mora2, ref nlink2, ref flag2, ref size2, ref midx2, ref atype2);
                    }

                    if (mora3 == null)
                    {
                        midx3  = midx2 + 1;
                        atype3 = atype2;
                        GetMoraInformation(node, index + size1 + size2, ref mora3, ref nlink3, ref flag3, ref size3, ref midx3, ref atype3);
                    }

                    // rule 1: look-ahead for 'masu' and 'desu'
                    if
                    (
                        mora2 != null && mora3 != null && nlink1 == nlink2 && nlink2 != nlink3 &&
                        (
                            mora1 == NJD_SET_UNVOICED_VOWEL_MA ||
                            mora1 == NJD_SET_UNVOICED_VOWEL_DE
                        ) &&
                        mora2 == NJD_SET_UNVOICED_VOWEL_SU &&
                        (
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_DOUSHI ||
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_JODOUSHI ||
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_KANDOUSHI
                        )
                    )
                    {
                        if (nlink3.Pron == NJD_SET_UNVOICED_VOWEL_QUESTION || nlink3.Pron == NJD_SET_UNVOICED_VOWEL_CHOUON)
                        {
                            flag2 = 0;
                        }
                        else
                        {
                            flag2 = 1;
                        }
                    }

                    // rule 2: look-ahead for 'shi'
                    if
                    (
                        flag1 != 1 && flag2 == -1 && flag3 != 1 && mora2 != null &&
                        nlink2.Pron == NJD_SET_UNVOICED_VOWEL_SHI &&
                        (
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_DOUSHI ||
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_JODOUSHI ||
                            nlink2.Pos == NJD_SET_UNVOICED_VOWEL_JOSHI
                        )
                    )
                    {
                        if (atype2 == midx2 + 1)
                        {
                            // rule 4
                            flag2 = 0;
                        }
                        else
                        {
                            // rule 5
                            flag2 = ApplyUnvoiceRule(mora2, mora3);
                        }

                        if (flag2 == 1)
                        {
                            if (flag1 == -1)
                            {
                                flag1 = 0;
                            }
                            if (flag3 == -1)
                            {
                                flag3 = 0;
                            }
                        }
                    }

                    // estimate unvoice
                    if (flag1 == -1)
                    {
                        if (nlink1.Pos == NJD_SET_UNVOICED_VOWEL_FILLER)
                        {
                            // rule 0
                            flag1 = 0;
                        }
                        else
                        if (flag2 == 1)
                        {
                            // rule 3
                            flag1 = 0;
                        }
                        else
                        if (atype1 == midx1 + 1)
                        {
                            // rule 4
                            flag1 = 0;
                        }
                        else
                        {
                            // rule 5
                            flag1 = ApplyUnvoiceRule(mora1, mora2);
                        }
                    }

                    if (flag1 == 1 && flag2 == -1)
                    {
                        flag2 = 0;
                    }

                    // store pronunciation

                    buff += mora1;

                    if (flag1 == 1)
                    {
                        buff += NJD_SET_UNVOICED_VOWEL_QUOTATION;
                    }

                    // prepare next step
                    index += size1;

                    mora1  = mora2;
                    nlink1 = nlink2;
                    size1  = size2;
                    flag1  = flag2;
                    midx1  = midx2;
                    atype1 = atype2;

                    mora2  = mora3;
                    nlink2 = nlink3;
                    size2  = size3;
                    flag2  = flag3;
                    midx2  = midx3;
                    atype2 = atype3;

                    mora3  = null;
                    nlink3 = null;
                    size3  = 0;
                    flag3  = -1;
                    midx3  = 0;
                    atype3 = 0;
                }

                node.Pron = buff;
            }
        }
        //---------------------------------------------------------------------------


        private void GetMoraInformation(NJDNode node, int index, ref string mora, ref NJDNode nlink, ref int flag, ref int size, ref int midx, ref int atype)
        {
            int    i;
            int    matched_size;
            string str = node.Pron;
            int    len = str.Length;

            // find next word
            if (index >= len)
            {
                if (node.next != null)
                {
                    GetMoraInformation(node.next, index - len, ref mora, ref nlink, ref flag, ref size, ref midx, ref atype);
                }
                else
                {
                    mora  = null;
                    nlink = null;
                    flag  = -1;
                    size  = 0;
                    midx  = 0;
                    atype = 0;
                }
                return;
            }

            nlink = node;

            // reset mora index and accent type for new word
            if (index == 0 && node.ChainFlag != 1)
            {
                midx  = 0;
                atype = node.Acc;
            }

            // special symbol
            if (str == NJD_SET_UNVOICED_VOWEL_TOUTEN)
            {
                mora = NJD_SET_UNVOICED_VOWEL_TOUTEN;
                flag = 0;
                size = NJD_SET_UNVOICED_VOWEL_TOUTEN.Length;
                return;
            }

            if (str == NJD_SET_UNVOICED_VOWEL_QUESTION)
            {
                mora = NJD_SET_UNVOICED_VOWEL_QUESTION;
                flag = 0;
                size = NJD_SET_UNVOICED_VOWEL_QUESTION.Length;
                return;
            }

            // reset
            mora = null;
            flag = -1;
            size = 0;

            // get mora
            for (i = 0; njd_set_unvoiced_vowel_mora_list[i] != null; i++)
            {
                matched_size = StrTopCmp(str, index, njd_set_unvoiced_vowel_mora_list[i]);
                if (matched_size > 0)
                {
                    mora = njd_set_unvoiced_vowel_mora_list[i];
                    size = matched_size;
                    break;
                }
            }

            // get unvoice flag
            matched_size = StrTopCmp(str, index + size, NJD_SET_UNVOICED_VOWEL_QUOTATION);
            if (matched_size > 0)
            {
                flag  = 1;
                size += matched_size;
            }
        }
Example #13
0
        public void SetDigit()
        {
            int     i, j;
            NJDNode s = null;
            NJDNode e = null;
            NJDNode node;
            int     find = 0;

            // convert digit sequence
            for (node = this.head; node != null; node = node.next)
            {
                if (find == 0 && node.PosGroup1 == NJD_SET_DIGIT_KAZU)
                {
                    find = 1;
                }
                if (GetDigit(node, 1) >= 0)
                {
                    if (s == null)
                    {
                        s = node;
                    }
                    if (node == this.tail)
                    {
                        e = node;
                    }
                }
                else
                {
                    if (s != null)
                    {
                        e = node.prev;
                    }
                }

                if (s != null && e != null)
                {
                    ConvertDigitSequence(s, e);
                    s = e = null;
                }
            }

            if (find == 0)
            {
                return;
            }

            this.remove_silent_node();

            if (this.head == null)
            {
                return;
            }

            for (node = this.head.next; node != null && node.next != null; node = node.next)
            {
                if
                (
                    node.Word != "*" &&
                    node.prev.Word != "*" &&
                    (node.Word == NJD_SET_DIGIT_TEN1 || node.Word == NJD_SET_DIGIT_TEN2) &&
                    (node.prev.PosGroup1 == NJD_SET_DIGIT_KAZU) &&
                    (node.next.PosGroup1 == NJD_SET_DIGIT_KAZU)
                )
                {
                    node.Load(NJD_SET_DIGIT_TEN_FEATURE);
                    node.ChainFlag = 1;

                    if (node.prev.Word == NJD_SET_DIGIT_ZERO1 || node.prev.Word == NJD_SET_DIGIT_ZERO2)
                    {
                        node.prev.Pron     = NJD_SET_DIGIT_ZERO_BEFORE_DP;
                        node.prev.MoraSize = 2;
                    }
                    else
                    if (node.prev.Word == NJD_SET_DIGIT_TWO)
                    {
                        node.prev.Pron     = NJD_SET_DIGIT_TWO_BEFORE_DP;
                        node.prev.MoraSize = 2;
                    }
                    else
                    if (node.prev.Word == NJD_SET_DIGIT_FIVE)
                    {
                        node.prev.Pron     = NJD_SET_DIGIT_FIVE_BEFORE_DP;
                        node.prev.MoraSize = 2;
                    }
                }
            }

            for (node = this.head.next; node != null; node = node.next)
            {
                if (node.prev.PosGroup1 == NJD_SET_DIGIT_KAZU)
                {
                    if (node.PosGroup2 == NJD_SET_DIGIT_JOSUUSHI || node.PosGroup1 == NJD_SET_DIGIT_FUKUSHIKANOU)
                    {
                        // convert digit pron
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1b, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1b, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1c1, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1c1, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1c2, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1c2, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1d, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1d, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1e, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1e, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1f, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1f, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1g, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1g, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1h, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1h, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1i, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1i, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1j, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1j, node.prev);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class1k, node) == 1)
                        {
                            ConvertDigitPron(njd_set_digit_rule_conv_table1k, node.prev);
                        }

                        // convert numerative pron
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class2b, node) == 1)
                        {
                            ConvertNumerativePron(njd_set_digit_rule_conv_table2b, node.prev, node);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class2c, node) == 1)
                        {
                            ConvertNumerativePron(njd_set_digit_rule_conv_table2c, node.prev, node);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class2d, node) == 1)
                        {
                            ConvertNumerativePron(njd_set_digit_rule_conv_table2d, node.prev, node);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class2e, node) == 1)
                        {
                            ConvertNumerativePron(njd_set_digit_rule_conv_table2e, node.prev, node);
                        }
                        else
                        if (SearchNumerativeClass(njd_set_digit_rule_numerative_class2f, node) == 1)
                        {
                            ConvertNumerativePron(njd_set_digit_rule_conv_table2f, node.prev, node);
                        }

                        // modify accent phrase
                        node.prev.ChainFlag = 0;
                        node.ChainFlag      = 1;
                    }
                }
            }

            for (node = this.head.next; node != null; node = node.next)
            {
                if (node.prev.PosGroup1 == NJD_SET_DIGIT_KAZU)
                {
                    if (node.PosGroup1 == NJD_SET_DIGIT_KAZU && node.prev.Word != null && node.Word != null)
                    {
                        // modify accent phrase
                        find = 0;
                        for (i = 0; njd_set_digit_rule_numeral_list4[i] != null; i++)
                        {
                            if (node.prev.Word == njd_set_digit_rule_numeral_list4[i])
                            {
                                for (j = 0; njd_set_digit_rule_numeral_list5[j] != null; j++)
                                {
                                    if (node.Word == njd_set_digit_rule_numeral_list5[j])
                                    {
                                        node.prev.ChainFlag = 0;
                                        node.ChainFlag      = 1;
                                        find = 1;
                                        break;
                                    }
                                }
                                break;
                            }
                        }

                        if (find == 0)
                        {
                            for (i = 0; njd_set_digit_rule_numeral_list5[i] != null; i++)
                            {
                                if (node.prev.Word == njd_set_digit_rule_numeral_list5[i])
                                {
                                    for (j = 0; njd_set_digit_rule_numeral_list4[j] != null; j++)
                                    {
                                        if (node.Word == njd_set_digit_rule_numeral_list4[j])
                                        {
                                            node.ChainFlag = 0;
                                            break;
                                        }
                                    }
                                    break;
                                }
                            }
                        }
                    }

                    if (SearchNumerativeClass(njd_set_digit_rule_numeral_list8, node) == 1)
                    {
                        ConvertDigitPron(njd_set_digit_rule_numeral_list9, node.prev);
                    }

                    if (SearchNumerativeClass(njd_set_digit_rule_numeral_list10, node) == 1)
                    {
                        ConvertDigitPron(njd_set_digit_rule_numeral_list11, node.prev);
                    }

                    if (SearchNumerativeClass(njd_set_digit_rule_numeral_list6, node) == 1)
                    {
                        ConvertNumerativePron(njd_set_digit_rule_numeral_list7, node.prev, node);
                    }
                }
            }

            for (node = this.head; node != null; node = node.next)
            {
                if
                (
                    node.next != null &&
                    node.next.Word != "*" &&
                    (node.PosGroup1 == NJD_SET_DIGIT_KAZU) &&
                    (node.prev == null || node.prev.PosGroup1 != NJD_SET_DIGIT_KAZU) &&
                    (node.next.PosGroup2 == NJD_SET_DIGIT_JOSUUSHI || node.next.PosGroup1 == NJD_SET_DIGIT_FUKUSHIKANOU)
                )
                {
                    // convert class3
                    for (i = 0; njd_set_digit_rule_numerative_class3[i] != null; i += 2)
                    {
                        if (node.next.Word == njd_set_digit_rule_numerative_class3[i] && node.next.Read == njd_set_digit_rule_numerative_class3[i + 1])
                        {
                            for (j = 0; njd_set_digit_rule_conv_table3[j] != null; j += 4)
                            {
                                if (node.Word == njd_set_digit_rule_conv_table3[j])
                                {
                                    node.Read     = njd_set_digit_rule_conv_table3[j + 1];
                                    node.Pron     = njd_set_digit_rule_conv_table3[j + 1];
                                    node.Acc      = Atoi(njd_set_digit_rule_conv_table3[j + 2]);
                                    node.MoraSize = Atoi(njd_set_digit_rule_conv_table3[j + 3]);
                                    break;
                                }
                            }
                            break;
                        }
                    }

                    // person
                    if (node.next.Word == NJD_SET_DIGIT_NIN)
                    {
                        for (i = 0; njd_set_digit_rule_conv_table4[i] != null; i += 2)
                        {
                            if (node.Word == njd_set_digit_rule_conv_table4[i])
                            {
                                node.Load(njd_set_digit_rule_conv_table4[i + 1]);
                                node.next.Pron = null;
                                break;
                            }
                        }
                    }

                    // the day of month
                    if (node.next.Word == NJD_SET_DIGIT_NICHI && node.Word != "*")
                    {
                        if (node.prev != null && node.prev.Word.IndexOf(NJD_SET_DIGIT_GATSU) == 0 && node.Word == NJD_SET_DIGIT_ONE)
                        {
                            node.Load(NJD_SET_DIGIT_TSUITACHI);
                            node.next.Pron = null;
                        }
                        else
                        {
                            for (i = 0; njd_set_digit_rule_conv_table5[i] != null; i += 2)
                            {
                                if (node.Word == njd_set_digit_rule_conv_table5[i])
                                {
                                    node.Load(njd_set_digit_rule_conv_table5[i + 1]);
                                    node.next.Pron = null;
                                    break;
                                }
                            }
                        }
                    }
                    else
                    if (node.next.Word == NJD_SET_DIGIT_NICHIKAN)
                    {
                        for (i = 0; njd_set_digit_rule_conv_table6[i] != null; i += 2)
                        {
                            if (node.Word == njd_set_digit_rule_conv_table6[i])
                            {
                                node.Load(njd_set_digit_rule_conv_table6[i + 1]);
                                node.next.Pron = null;
                                break;
                            }
                        }
                    }
                }
            }

            for (node = this.head; node != null; node = node.next)
            {
                if ((node.prev == null || node.prev.PosGroup1 != NJD_SET_DIGIT_KAZU) && node.next != null && node.next.next != null)
                {
                    if (node.Word == NJD_SET_DIGIT_TEN && node.next.Word == NJD_SET_DIGIT_FOUR)
                    {
                        if (node.next.next.Word == NJD_SET_DIGIT_NICHI)
                        {
                            node.Load(NJD_SET_DIGIT_JUYOKKA);
                            node.next.Pron      = null;
                            node.next.next.Pron = null;
                        }
                        else
                        if (node.next.next.Word == NJD_SET_DIGIT_NICHIKAN)
                        {
                            node.Load(NJD_SET_DIGIT_JUYOKKAKAN);
                            node.next.Pron      = null;
                            node.next.next.Pron = null;
                        }
                    }
                    else
                    if (node.Word == NJD_SET_DIGIT_TWO && node.next.Word == NJD_SET_DIGIT_TEN)
                    {
                        if (node.next.next.Word == NJD_SET_DIGIT_NICHI)
                        {
                            node.Load(NJD_SET_DITIT_HATSUKA);
                            node.next.Pron      = null;
                            node.next.next.Pron = null;
                        }
                        else
                        if (node.next.next.Word == NJD_SET_DIGIT_NICHIKAN)
                        {
                            node.Load(NJD_SET_DIGIT_HATSUKAKAN);
                            node.next.Pron      = null;
                            node.next.next.Pron = null;
                        }
                        else
                        if (node.next.next.Word == NJD_SET_DIGIT_FOUR && node.next.next.next != null)
                        {
                            if (node.next.next.next.Word == NJD_SET_DIGIT_NICHI)
                            {
                                node.Load(NJD_SET_DIGIT_NIJU);
                                node.next.Load(NJD_SET_DITIT_YOKKA);
                                node.next.next.Pron      = null;
                                node.next.next.next.Pron = null;
                            }
                            else
                            if (node.next.next.next.Word == NJD_SET_DIGIT_NICHIKAN)
                            {
                                node.Load(NJD_SET_DIGIT_NIJU);
                                node.next.Load(NJD_SET_DIGIT_YOKKAKAN);
                                node.next.next.Pron      = null;
                                node.next.next.next.Pron = null;
                            }
                        }
                    }
                }
            }

            this.remove_silent_node();
            if (this.head == null)
            {
                return;
            }
        }
Example #14
0
        private void ConvertDigitSequence(NJDNode start, NJDNode end)
        {
            NJDNode node;
            NJDNode newnode;
            int     digit;
            int     place = 0;
            int     index;
            int     size = 0;
            int     have = 0;

            for (node = start; node != end.next; node = node.next)
            {
                size++;
            }

            if (size <= 1)
            {
                return;
            }

            if (GetDigitSequenceScore(start, end) < 0)
            {
                for (node = start, size = 0; node != end.next; node = node.next)
                {
                    if (node.Word == NJD_SET_DIGIT_ZERO1 || node.Word == NJD_SET_DIGIT_ZERO2)
                    {
                        node.Pron     = NJD_SET_DIGIT_ZERO_AFTER_DP;
                        node.MoraSize = 2;
                    }
                    else
                    if (node.Word == NJD_SET_DIGIT_TWO)
                    {
                        node.Pron     = NJD_SET_DIGIT_TWO_AFTER_DP;
                        node.MoraSize = 2;
                    }
                    else
                    if (node.Word == NJD_SET_DIGIT_FIVE)
                    {
                        node.Pron     = NJD_SET_DIGIT_FIVE_AFTER_DP;
                        node.MoraSize = 2;
                    }
                    node.ChainRule = null;
                    if (size % 2 == 0)
                    {
                        node.ChainFlag = 0;
                    }
                    else
                    {
                        node.ChainFlag = 1;
                        node.prev.Acc  = 3;
                    }
                    size++;
                }
                return;
            }

            index = size % 4;
            if (index == 0)
            {
                index = 4;
            }

            if (size > index)
            {
                place = (size - index) / 4;
            }
            index--;
            if (place > 17)
            {
                return;
            }

            for (node = start; node != end.next; node = node.next)
            {
                digit = GetDigit(node, 0);
                if (index == 0)
                {
                    if (digit == 0)
                    {
                        node.Pron     = null;
                        node.Acc      = 0;
                        node.MoraSize = 0;
                    }
                    else
                    {
                        have = 1;
                    }
                    if (have == 1)
                    {
                        if (place > 0)
                        {
                            newnode = new NJDNode();
                            newnode.Load(njd_set_digit_rule_numeral_list3[place]);
                            node = NJDNode.Insert(node, node.next, newnode);
                        }
                        have = 0;
                    }
                    place--;
                }
                else
                {
                    if (digit <= 1)
                    {
                        node.Pron     = null;
                        node.Acc      = 0;
                        node.MoraSize = 0;
                    }
                    if (digit > 0)
                    {
                        newnode = new NJDNode();
                        newnode.Load(njd_set_digit_rule_numeral_list2[index]);
                        node = NJDNode.Insert(node, node.next, newnode);
                        have = 1;
                    }
                }
                index--;
                if (index < 0)
                {
                    index = 4 - 1;
                }
            }
        }
Example #15
0
        private int GetDigitSequenceScore(NJDNode start, NJDNode end)
        {
            string buff_pos_group1 = null;
            string buff_pos_group2 = null;
            string buff_word       = null;
            int    score           = 0;

            if (start.prev != null)
            {
                buff_pos_group1 = start.prev.PosGroup1;
                buff_pos_group2 = start.prev.PosGroup2;
                buff_word       = start.prev.Word;

                if (buff_pos_group1 == NJD_SET_DIGIT_SUUSETSUZOKU)                     // prev pos_group1
                {
                    score += 2;
                }

                if (buff_pos_group2 == NJD_SET_DIGIT_JOSUUSHI || buff_pos_group1 == NJD_SET_DIGIT_FUKUSHIKANOU)                    // prev pos_group1 and pos_group2
                {
                    score += 1;
                }

                if (buff_word != null)
                {
                    if (buff_word == NJD_SET_DIGIT_TEN1 || buff_word == NJD_SET_DIGIT_TEN2)
                    {
                        // prev string
                        if (start.prev.prev == null || start.prev.prev.PosGroup1 != NJD_SET_DIGIT_KAZU)
                        {
                            score += 0;
                        }
                        else
                        {
                            score -= 5;
                        }
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN1)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN2)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN3)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN4)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN5)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_KAKKO1)
                    {
                        if (start.prev.prev == null || start.prev.prev.PosGroup1 != NJD_SET_DIGIT_KAZU)
                        {
                            score += 0;
                        }
                        else
                        {
                            score -= 2;
                        }
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_KAKKO2)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_BANGOU)
                    {
                        score -= 2;
                    }
                }

                if (start.prev.prev != null)
                {
                    buff_word = start.prev.prev.Word;                           // prev prev string
                    if (buff_word == NJD_SET_DIGIT_BANGOU)
                    {
                        score -= 2;
                    }
                }
            }

            if (end.next != null)
            {
                buff_pos_group1 = end.next.PosGroup1;
                buff_pos_group2 = end.next.PosGroup2;                   // next pos_group2
                buff_word       = end.next.Word;                        // next string
                if (buff_pos_group2 == NJD_SET_DIGIT_JOSUUSHI || buff_pos_group1 == NJD_SET_DIGIT_FUKUSHIKANOU)
                {
                    score += 2;
                }
                if (buff_word != null)
                {
                    if (buff_word == NJD_SET_DIGIT_HAIHUN1)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN2)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN3)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN4)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_HAIHUN5)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_KAKKO1)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_KAKKO2)
                    {
                        if (end.next.next == null || end.next.next.PosGroup1 != NJD_SET_DIGIT_KAZU)
                        {
                            score += 0;
                        }
                        else
                        {
                            score -= 2;
                        }
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_BANGOU)
                    {
                        score -= 2;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_TEN1)
                    {
                        score += 4;
                    }
                    else
                    if (buff_word == NJD_SET_DIGIT_TEN2)
                    {
                        score += 4;
                    }
                }
            }

            return(score);
        }
Example #16
0
        public void Load(string tString)
        {
            int i, j;

            string tOrig;
            string tRead;
            string tPron;
            string tAcc;

            int count;

            int tWordOffset;
            int tOrigOffset;
            int tReadOffset;
            int tPronOffset;
            int tAccOffset;

            NJDNode prev = null;

            //----------------------------------

            int    tOffset = 0;
            string tToken;

            //----------------------------------

            string tWord = GetTokenFromString(tString, ref tOffset, ',');


            m_Pos       = GetTokenFromString(tString, ref tOffset, ',');
            m_PosGroup1 = GetTokenFromString(tString, ref tOffset, ',');
            m_PosGroup2 = GetTokenFromString(tString, ref tOffset, ',');
            m_PosGroup3 = GetTokenFromString(tString, ref tOffset, ',');

            m_CType = GetTokenFromString(tString, ref tOffset, ',');
            m_CForm = GetTokenFromString(tString, ref tOffset, ',');

            tOrig = GetTokenFromString(tString, ref tOffset, ',');
            tRead = GetTokenFromString(tString, ref tOffset, ',');
            tPron = GetTokenFromString(tString, ref tOffset, ',');
            tAcc  = GetTokenFromString(tString, ref tOffset, ',');


            m_ChainRule = GetTokenFromString(tString, ref tOffset, ',');

            tToken = GetTokenFromString(tString, ref tOffset, ',');
            int tChainFlag = -1;                // デフォルトは -1

            if (int.TryParse(tToken, out tChainFlag) == false)
            {
                tChainFlag = -1;
            }
            m_ChainFlag = tChainFlag;

            // for symbol
            if (tAcc.IndexOf("*") >= 0 || tAcc.IndexOf("/") < 0)
            {
                m_Word     = tWord;
                m_Orig     = tOrig;
                m_Read     = tRead;
                m_Pron     = tPron;
                m_Acc      = 0;
                m_MoraSize = 0;
                return;
            }

            for (i = 0, count = 0; i < tAcc.Length; i++)
            {
                if (tAcc[i] == '/')
                {
                    count++;
                }
            }


            // for single word
            if (count == 1)
            {
                m_Word = tWord;
                m_Orig = tOrig;
                m_Read = tRead;
                m_Pron = tPron;

                tAccOffset = 0;

                tToken = GetTokenFromString(tAcc, ref tAccOffset, '/');
                if (string.IsNullOrEmpty(tToken) == true)
                {
                    j = 0;
                    Debug.LogWarning("WARNING: NJDNode_load() in njd_node.c: Accent is empty.");
                }
                else
                {
                    j = 0;
                    int.TryParse(tToken, out j);
                }
                m_Acc = j;

                tToken = GetTokenFromString(tAcc, ref tAccOffset, ':');
                if (string.IsNullOrEmpty(tToken) == true)
                {
                    j = 0;
                    Debug.LogWarning("WARNING: NJDNode_load() in njd_node.c: Mora size is empty.");
                }
                else
                {
                    j = 0;
                    int.TryParse(tToken, out j);
                }
                m_MoraSize = j;

                return;
            }

            // parse chained word
            tWordOffset = 0;
            tOrigOffset = 0;
            tReadOffset = 0;
            tPronOffset = 0;
            tAccOffset  = 0;

            NJDNode node = this;

            for (i = 0; i < count; i++)
            {
                if (i > 0)
                {
                    node = new NJDNode();
                    node.Copy(node, prev);
                    node.m_ChainFlag = 0;
                    node.prev        = prev;
                    prev.next        = node;
                }

                // orig
                tToken = GetTokenFromString(tOrig, ref tOrigOffset, ',');
                m_Orig = tToken;

                // string
                if (i + 1 < count)
                {
                    node.m_Word  = tToken;
                    tWordOffset += tToken.Length;
                }
                else
                {
                    node.m_Word = tWord.Substring(tWordOffset);
                }

                // read
                tToken      = GetTokenFromString(tRead, ref tReadOffset, ',');
                node.m_Read = tToken;

                // pron
                tToken      = GetTokenFromString(tPron, ref tPronOffset, ',');
                node.m_Pron = tToken;

                // acc
                tToken = GetTokenFromString(tAcc, ref tAccOffset, '/');
                if (string.IsNullOrEmpty(tToken) == true)
                {
                    j = 0;
                    Debug.LogWarning("WARNING: NJDNode_load() in njd_node.c: Accent is empty.");
                }
                else
                {
                    j = 0;
                    int.TryParse(tToken, out j);
                }
                node.m_Acc = j;

                // mora size
                tToken = GetTokenFromString(tAcc, ref tAccOffset, ':');
                if (string.IsNullOrEmpty(tToken) == true)
                {
                    j = 0;
                    Debug.LogWarning("WARNING: NJDNode_load() in njd_node.c: Mora size is empty.");
                }
                else
                {
                    j = 0;
                    int.TryParse(tToken, out j);
                }
                node.m_MoraSize = j;

                prev = node;
            }
        }
        public void SetAccentType()
        {
            NJDNode node;
            NJDNode top_node  = null;
            string  rule      = "";
            int     add_type  = 0;
            int     mora_size = 0;

            if (this.head == null)
            {
                return;
            }

            for (node = this.head; node != null; node = node.next)
            {
                if (node.Word == null)
                {
                    continue;
                }

                if (node == this.head || node.ChainFlag != 1)
                {
                    // store the top node
                    top_node  = node;
                    mora_size = 0;
                }
                else
                if (node.prev != null && node.ChainFlag == 1)
                {
                    // get accent change type
                    GetRule(node.ChainRule, node.prev.Pos, ref rule, ref add_type);

                    // change accent type
                    if (rule == "*")
                    {
                        // no chnage
                    }
                    else
                    if (rule == "F1")
                    {
                        // for ancillary word
                    }
                    else
                    if (rule == "F2")
                    {
                        if (top_node.Acc == 0)
                        {
                            top_node.Acc = mora_size + add_type;
                        }
                    }
                    else
                    if (rule == "F3")
                    {
                        if (top_node.Acc != 0)
                        {
                            top_node.Acc = mora_size + add_type;
                        }
                    }
                    else
                    if (rule == "F4")
                    {
                        top_node.Acc = mora_size + add_type;
                    }
                    else
                    if (rule == "F5")
                    {
                        top_node.Acc = 0;
                    }
                    else
                    if (rule == "C1")
                    {
                        // for noun
                        top_node.Acc = mora_size + node.Acc;
                    }
                    else
                    if (rule == "C2")
                    {
                        top_node.Acc = mora_size + 1;
                    }
                    else
                    if (rule == "C3")
                    {
                        top_node.Acc = mora_size;
                    }
                    else
                    if (rule == "C4")
                    {
                        top_node.Acc = 0;
                    }
                    else
                    if (rule == "C5")
                    {
                    }
                    else
                    if (rule == "P1")
                    {
                        // for postfix
                        if (node.Acc == 0)
                        {
                            top_node.Acc = 0;
                        }
                        else
                        {
                            top_node.Acc = mora_size + node.Acc;
                        }
                    }
                    else
                    if (rule == "P2")
                    {
                        if (node.Acc == 0)
                        {
                            top_node.Acc = mora_size + 1;
                        }
                        else
                        {
                            top_node.Acc = mora_size + node.Acc;
                        }
                    }
                    else
                    if (rule == "P6")
                    {
                        top_node.Acc = 0;
                    }
                    else
                    if (rule == "P14")
                    {
                        if (node.Acc != 0)
                        {
                            top_node.Acc = mora_size + node.Acc;
                        }
                    }
                }

                // change accent type for digit
                if
                (
                    (node.prev != null) &&
                    (node.ChainFlag == 1) &&
                    (node.prev.PosGroup1 == NJD_SET_ACCENT_TYPE_KAZU) &&
                    (node.PosGroup1 == NJD_SET_ACCENT_TYPE_KAZU)
                )
                {
                    if (node.Word == NJD_SET_ACCENT_TYPE_JYUU)
                    {
                        // 10^1
                        if
                        (
                            node.prev.Word != null &&
                            (
                                node.prev.Word == NJD_SET_ACCENT_TYPE_SAN ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_YON ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_KYUU ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_NAN ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_SUU
                            )
                        )
                        {
                            node.prev.Acc = 1;
                        }
                        else
                        {
                            node.prev.Acc = 1;
                        }

                        if
                        (
                            node.prev.Word != null &&
                            (
                                node.prev.Word == NJD_SET_ACCENT_TYPE_GO ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_ROKU ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_HACHI
                            )
                        )
                        {
                            if
                            (
                                node.next != null && node.next.Word != null &&
                                (
                                    node.next.Word == NJD_SET_ACCENT_TYPE_ICHI ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_NI ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_SAN ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_YON ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_GO ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_ROKU ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_NANA ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_HACHI ||
                                    node.next.Word == NJD_SET_ACCENT_TYPE_KYUU
                                )
                            )
                            {
                                node.prev.Acc = 0;
                            }
                        }
                    }
                    else
                    if (node.Word == NJD_SET_ACCENT_TYPE_HYAKU)
                    {
                        // 10^2
                        if (node.prev.Word != null && node.prev.Word == NJD_SET_ACCENT_TYPE_NANA)
                        {
                            node.prev.Acc = 2;
                        }
                        else
                        if
                        (
                            node.prev.Word != null &&
                            (
                                node.prev.Word == NJD_SET_ACCENT_TYPE_SAN ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_YON ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_KYUU ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_NAN
                            )
                        )
                        {
                            node.prev.Acc = 1;
                        }
                        else
                        {
                            node.prev.Acc = node.prev.MoraSize + node.MoraSize;
                        }
                    }
                    else
                    if (node.Word == NJD_SET_ACCENT_TYPE_SEN)
                    {
                        // 10^3
                        node.prev.Acc = node.prev.MoraSize + 1;
                    }
                    else
                    if (node.Word == NJD_SET_ACCENT_TYPE_MAN)
                    {
                        // 10^4
                        node.prev.Acc = node.prev.MoraSize + 1;
                    }
                    else
                    if (node.Word == NJD_SET_ACCENT_TYPE_OKU)
                    {
                        // 10^8
                        if
                        (
                            node.prev.Word != null &&
                            (
                                node.prev.Word == NJD_SET_ACCENT_TYPE_ICHI ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_ROKU ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_NANA ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_HACHI ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_IKU
                            )
                        )
                        {
                            node.prev.Acc = 2;
                        }
                        else
                        {
                            node.prev.Acc = 1;
                        }
                    }
                    else
                    if (node.Word == NJD_SET_ACCENT_TYPE_CHOU)
                    {
                        // 10^12
                        if
                        (
                            node.prev.Word != null &&
                            (
                                node.prev.Word == NJD_SET_ACCENT_TYPE_ROKU ||
                                node.prev.Word == NJD_SET_ACCENT_TYPE_NANA
                            )
                        )
                        {
                            node.prev.Acc = 2;
                        }
                        else
                        {
                            node.prev.Acc = 1;
                        }
                    }
                }

                if
                (
                    node.Word == NJD_SET_ACCENT_TYPE_JYUU &&
                    node.ChainFlag != 1 &&
                    node.next != null &&
                    node.next.PosGroup1 == NJD_SET_ACCENT_TYPE_KAZU
                )
                {
                    node.Acc = 0;
                }

                mora_size += node.MoraSize;
            }
        }
Example #18
0
 public void Initialize()
 {
     this.head = null;
     this.tail = null;
 }