Ejemplo n.º 1
0
        // load sieve (from file or make a deterministic sieve)
        /// <exception cref="System.Exception"/>
        public static Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve LoadSieve(Properties props, string sievename)
        {
            switch (HybridCorefProperties.GetClassifierType(props, sievename))
            {
            case Sieve.ClassifierType.Rule:
            {
                // log.info("Loading sieve: "+sievename+" ...");
                DeterministicCorefSieve sieve = (DeterministicCorefSieve)Sharpen.Runtime.GetType("edu.stanford.nlp.coref.hybrid.sieve." + sievename).GetConstructor().NewInstance();
                sieve.props = props;
                sieve.lang  = HybridCorefProperties.GetLanguage(props);
                return(sieve);
            }

            case Sieve.ClassifierType.Rf:
            {
                log.Info("Loading sieve: " + sievename + " from " + HybridCorefProperties.GetPathModel(props, sievename) + " ... ");
                RFSieve rfsieve = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(HybridCorefProperties.GetPathModel(props, sievename));
                rfsieve.thresMerge = HybridCorefProperties.GetMergeThreshold(props, sievename);
                log.Info("done. Merging threshold: " + rfsieve.thresMerge);
                return(rfsieve);
            }

            case Sieve.ClassifierType.Oracle:
            {
                OracleSieve oracleSieve = new OracleSieve(props, sievename);
                oracleSieve.props = props;
                return(oracleSieve);
            }

            default:
            {
                throw new Exception("no sieve type specified");
            }
            }
        }
        /// <exception cref="System.Exception"/>
        public static string PrintErrorLog(Mention m, Document document, ICounter <int> probs, int mIdx, Dictionaries dict, RFSieve sieve)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append("\nERROR START-----------------------------------------------------------------------\n");
            sb.Append("RESOLVER TYPE: mType: " + sieve.mType + ", aType: " + sieve.aType).Append("\n");
            sb.Append("DOCUMENT: " + document.docInfo["DOC_ID"] + ", " + document.docInfo["DOC_PART"]).Append("\n");
            IList <Mention> orderedAnts = new List <Mention>();

            sb.Append("\nGOLD CLUSTER ID\n");
            for (int sentDist = m.sentNum; sentDist >= 0; sentDist--)
            {
                if (sentDist == sieve.maxSentDist)
                {
                    sb.Append("\tstart compare from here-------------\n");
                }
                int sentIdx = m.sentNum - sentDist;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, true, true)).Append("\n");
            }
            sb.Append("\nMENTION ID\n");
            for (int sentDist_1 = m.sentNum; sentDist_1 >= 0; sentDist_1--)
            {
                if (sentDist_1 == sieve.maxSentDist)
                {
                    sb.Append("\tstart compare from here-------------\n");
                }
                int sentIdx = m.sentNum - sentDist_1;
                sb.Append("\tSENT " + sentIdx + "\t" + SentenceStringWithMention(sentIdx, document, false, false)).Append("\n");
            }
            // get dcoref antecedents ordering
            for (int sentDist_2 = 0; sentDist_2 <= Math.Min(sieve.maxSentDist, m.sentNum); sentDist_2++)
            {
                int sentIdx = m.sentNum - sentDist_2;
                Sharpen.Collections.AddAll(orderedAnts, Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.GetOrderedAntecedents(m, sentIdx, mIdx, document.predictedMentions, dict));
            }
            IDictionary <int, int> orders = Generics.NewHashMap();

            for (int i = 0; i < orderedAnts.Count; i++)
            {
                Mention ant = orderedAnts[i];
                orders[ant.mentionID] = i;
            }
            CorefCluster mC              = document.corefClusters[m.corefClusterID];
            bool         isFirstMention  = IsFirstMention(m, document);
            bool         foundCorefAnt   = (probs.Size() > 0 && Counters.Max(probs) > sieve.thresMerge);
            bool         correctDecision = ((isFirstMention && !foundCorefAnt) || (foundCorefAnt && Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, Counters.Argmax(probs))));
            bool         barePlural      = (m.originalSpan.Count == 1 && m.headWord.Tag().Equals("NNS"));

            if (correctDecision)
            {
                return(string.Empty);
            }
            sb.Append("\nMENTION: " + m.SpanToString() + " (" + m.mentionID + ")\tperson: " + m.person + "\tsingleton? " + (!m.hasTwin) + "\t\tisFirstMention? " + isFirstMention + "\t\tfoundAnt? " + foundCorefAnt + "\t\tcorrectDecision? " + correctDecision
                      + "\tbarePlural? " + barePlural);
            sb.Append("\n\ttype: " + m.mentionType + "\tHeadword: " + m.headWord.Word() + "\tNEtype: " + m.nerString + "\tnumber: " + m.number + "\tgender: " + m.gender + "\tanimacy: " + m.animacy).Append("\n");
            if (m.contextParseTree != null)
            {
                sb.Append(m.contextParseTree.PennString());
            }
            sb.Append("\n\n\t\tOracle\t\tDcoref\t\t\tRF\t\tAntecedent\n");
            foreach (int antID in Counters.ToSortedList(probs))
            {
                Mention      ant       = document.predictedMentionsByID[antID];
                CorefCluster aC        = document.corefClusters[ant.corefClusterID];
                bool         oracle    = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.IsReallyCoref(document, m.mentionID, antID);
                double       prob      = probs.GetCount(antID);
                int          order     = orders[antID];
                string       oracleStr = (oracle) ? "coref   " : "notcoref";
                //      String dcorefStr = (dcoref)? "coref   " : "notcoref";
                string dcorefStr = "notcoref";
                if (dcorefDiscourse.Coreferent(document, mC, aC, m, ant, dict, null))
                {
                    dcorefStr = "coref-discourse";
                }
                else
                {
                    //      else if(dcorefChineseHeadMatch.coreferent(document, mC, aC, m, ant, dict, null)) dcorefStr = "coref-chineseHeadMatch";
                    if (dcorefExactString.Coreferent(document, mC, aC, m, ant, dict, null))
                    {
                        dcorefStr = "coref-exactString";
                    }
                    else
                    {
                        if (dcorefRelaxedExactString.Coreferent(document, mC, aC, m, ant, dict, null))
                        {
                            dcorefStr = "coref-relaxedExact";
                        }
                        else
                        {
                            if (dcorefPreciseConstructs.Coreferent(document, mC, aC, m, ant, dict, null))
                            {
                                dcorefStr = "coref-preciseConstruct";
                            }
                            else
                            {
                                if (dcorefHead1.Coreferent(document, mC, aC, m, ant, dict, null))
                                {
                                    dcorefStr = "coref-head1";
                                }
                                else
                                {
                                    if (dcorefHead2.Coreferent(document, mC, aC, m, ant, dict, null))
                                    {
                                        dcorefStr = "coref-head2";
                                    }
                                    else
                                    {
                                        if (dcorefHead3.Coreferent(document, mC, aC, m, ant, dict, null))
                                        {
                                            dcorefStr = "coref-head3";
                                        }
                                        else
                                        {
                                            if (dcorefHead4.Coreferent(document, mC, aC, m, ant, dict, null))
                                            {
                                                dcorefStr = "coref-head4";
                                            }
                                            else
                                            {
                                                if (dcorefRelaxedHead.Coreferent(document, mC, aC, m, ant, dict, null))
                                                {
                                                    dcorefStr = "coref-relaxedHead";
                                                }
                                                else
                                                {
                                                    if (dcorefPronounSieve.Coreferent(document, mC, aC, m, ant, dict, null))
                                                    {
                                                        dcorefStr = "coref-pronounSieve";
                                                    }
                                                    else
                                                    {
                                                        if (dcorefSpeaker.Coreferent(document, mC, aC, m, ant, dict, null))
                                                        {
                                                            dcorefStr = "coref-speaker";
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                dcorefStr += "\t" + order.ToString();
                string probStr = df.Format(prob);
                sb.Append("\t\t" + oracleStr + "\t" + dcorefStr + "\t" + probStr + "\t\t" + ant.SpanToString() + " (" + ant.mentionID + ")\n");
            }
            sb.Append("ERROR END -----------------------------------------------------------------------\n");
            return(sb.ToString());
        }