/*
  * private void loadGenderNumber(String file, String neutralWordsFile) {
  * try {
  * getWordsFromFile(neutralWordsFile, neutralWords, false);
  * Map<List<String>, Gender> temp = IOUtils.readObjectFromURLOrClasspathOrFileSystem(file);
  * genderNumber.putAll(temp);
  * } catch (IOException e) {
  * throw new RuntimeIOException(e);
  * } catch (ClassNotFoundException e) {
  * throw new RuntimeIOException(e);
  * }
  * }
  */
 /// <summary>Load Bergsma and Lin (2006) gender and number list.</summary>
 private void LoadGenderNumber(string file, string neutralWordsFile)
 {
     try
     {
         using (BufferedReader reader = IOUtils.ReaderFromString(file))
         {
             GetWordsFromFile(neutralWordsFile, neutralWords, false);
             string[] split    = new string[2];
             string[] countStr = new string[3];
             for (string line; (line = reader.ReadLine()) != null;)
             {
                 StringUtils.SplitOnChar(split, line, '\t');
                 StringUtils.SplitOnChar(countStr, split[1], ' ');
                 int male    = System.Convert.ToInt32(countStr[0]);
                 int female  = System.Convert.ToInt32(countStr[1]);
                 int neutral = System.Convert.ToInt32(countStr[2]);
                 Dictionaries.Gender gender = Dictionaries.Gender.Unknown;
                 if (male * 0.5 > female + neutral && male > 2)
                 {
                     gender = Dictionaries.Gender.Male;
                 }
                 else
                 {
                     if (female * 0.5 > male + neutral && female > 2)
                     {
                         gender = Dictionaries.Gender.Female;
                     }
                     else
                     {
                         if (neutral * 0.5 > male + female && neutral > 2)
                         {
                             gender = Dictionaries.Gender.Neutral;
                         }
                     }
                 }
                 if (gender == Dictionaries.Gender.Unknown)
                 {
                     continue;
                 }
                 string[]       words  = split[0].Split(" ");
                 IList <string> tokens = Arrays.AsList(words);
                 genderNumber[tokens] = gender;
             }
         }
     }
     catch (IOException e)
     {
         throw new RuntimeIOException(e);
     }
 }
 /// <summary>This constructor is used to recreate a CorefMention following serialization.</summary>
 public CorefMention(Dictionaries.MentionType mentionType, Dictionaries.Number number, Dictionaries.Gender gender, Dictionaries.Animacy animacy, int startIndex, int endIndex, int headIndex, int corefClusterID, int mentionID, int sentNum, IntTuple
                     position, string mentionSpan)
 {
     this.mentionType    = mentionType;
     this.number         = number;
     this.gender         = gender;
     this.animacy        = animacy;
     this.startIndex     = startIndex;
     this.endIndex       = endIndex;
     this.headIndex      = headIndex;
     this.corefClusterID = corefClusterID;
     this.mentionID      = mentionID;
     this.sentNum        = sentNum;
     this.position       = position;
     this.mentionSpan    = mentionSpan;
 }
 /// <summary>This constructor builds the external CorefMention class from the internal Mention.</summary>
 public CorefMention(Mention m, IntTuple pos)
 {
     mentionType    = m.mentionType;
     number         = m.number;
     gender         = m.gender;
     animacy        = m.animacy;
     startIndex     = m.startIndex + 1;
     endIndex       = m.endIndex + 1;
     headIndex      = m.headIndex + 1;
     corefClusterID = m.corefClusterID;
     sentNum        = m.sentNum + 1;
     mentionID      = m.mentionID;
     mentionSpan    = m.SpanToString();
     // index starts from 1
     position = new IntTuple(2);
     position.Set(0, pos.Get(0) + 1);
     position.Set(1, pos.Get(1) + 1);
     m.headWord.Set(typeof(CorefCoreAnnotations.CorefClusterIdAnnotation), corefClusterID);
 }
        /// <summary>Loads the CorefChain objects from the serialized buffer.</summary>
        /// <param name="reader">the buffer</param>
        /// <returns>A map from cluster id to clusters</returns>
        /// <exception cref="System.IO.IOException"/>
        private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader)
        {
            string line = reader.ReadLine().Trim();

            if (line.IsEmpty())
            {
                return(null);
            }
            int clusterCount = System.Convert.ToInt32(line);
            IDictionary <int, CorefChain> chains = Generics.NewHashMap();

            // read each cluster
            for (int c = 0; c < clusterCount; c++)
            {
                line = reader.ReadLine().Trim();
                string[] bits         = line.Split("\\s");
                int      cid          = System.Convert.ToInt32(bits[0]);
                int      mentionCount = System.Convert.ToInt32(bits[1]);
                IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap();
                CorefChain.CorefMention representative = null;
                // read each mention in this cluster
                for (int m = 0; m < mentionCount; m++)
                {
                    line = reader.ReadLine();
                    bits = line.Split("\\s");
                    IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1]));
                    bool    rep = bits[2].Equals("1");
                    Dictionaries.MentionType mentionType = ParseMentionType(bits[3]);
                    Dictionaries.Number      number      = ParseNumber(bits[4]);
                    Dictionaries.Gender      gender      = ParseGender(bits[5]);
                    Dictionaries.Animacy     animacy     = ParseAnimacy(bits[6]);
                    int   startIndex = System.Convert.ToInt32(bits[7]);
                    int   endIndex   = System.Convert.ToInt32(bits[8]);
                    int   headIndex  = System.Convert.ToInt32(bits[9]);
                    int   clusterID  = System.Convert.ToInt32(bits[10]);
                    int   mentionID  = System.Convert.ToInt32(bits[11]);
                    int   sentNum    = System.Convert.ToInt32(bits[12]);
                    int   posLen     = System.Convert.ToInt32(bits[13]);
                    int[] posElems   = new int[posLen];
                    for (int i = 0; i < posLen; i++)
                    {
                        posElems[i] = System.Convert.ToInt32(bits[14 + i]);
                    }
                    IntTuple position = new IntTuple(posElems);
                    string   span     = UnescapeSpace(bits[14 + posLen]);
                    CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span);
                    ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key];
                    if (mentionsWithThisHead == null)
                    {
                        mentionsWithThisHead = Generics.NewHashSet();
                        mentionMap[key]      = mentionsWithThisHead;
                    }
                    mentionsWithThisHead.Add(mention);
                    if (rep)
                    {
                        representative = mention;
                    }
                }
                // construct the cluster
                CorefChain chain = new CorefChain(cid, mentionMap, representative);
                chains[cid] = chain;
            }
            reader.ReadLine();
            return(chains);
        }
        // static class
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string input  = null;
            string output = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                {
                    input     = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                    {
                        output    = args[argIndex + 1];
                        argIndex += 2;
                    }
                    else
                    {
                        throw new ArgumentException("Unknown argument " + args[argIndex]);
                    }
                }
            }
            if (input == null)
            {
                throw new ArgumentException("Must specify input with -input");
            }
            if (output == null)
            {
                throw new ArgumentException("Must specify output with -output");
            }
            IDictionary <IList <string>, Dictionaries.Gender> genderNumber = Generics.NewHashMap();
            BufferedReader reader = IOUtils.ReaderFromString(input);

            for (string line; (line = reader.ReadLine()) != null;)
            {
                string[]            split    = line.Split("\t");
                string[]            countStr = split[1].Split(" ");
                int                 male     = System.Convert.ToInt32(countStr[0]);
                int                 female   = System.Convert.ToInt32(countStr[1]);
                int                 neutral  = System.Convert.ToInt32(countStr[2]);
                Dictionaries.Gender gender   = Dictionaries.Gender.Unknown;
                if (male * 0.5 > female + neutral && male > 2)
                {
                    gender = Dictionaries.Gender.Male;
                }
                else
                {
                    if (female * 0.5 > male + neutral && female > 2)
                    {
                        gender = Dictionaries.Gender.Female;
                    }
                    else
                    {
                        if (neutral * 0.5 > male + female && neutral > 2)
                        {
                            gender = Dictionaries.Gender.Neutral;
                        }
                    }
                }
                if (gender == Dictionaries.Gender.Unknown)
                {
                    continue;
                }
                string[]       words  = split[0].Split(" ");
                IList <string> tokens = Arrays.AsList(words);
                genderNumber[tokens] = gender;
            }
            IOUtils.WriteObjectToFile(genderNumber, output);
        }