/* * private void loadGenderNumber(String file, String neutralWordsFile) { * try { * getWordsFromFile(neutralWordsFile, neutralWords, false); * Map<List<String>, Gender> temp = IOUtils.readObjectFromURLOrClasspathOrFileSystem(file); * genderNumber.putAll(temp); * } catch (IOException e) { * throw new RuntimeIOException(e); * } catch (ClassNotFoundException e) { * throw new RuntimeIOException(e); * } * } */ /// <summary>Load Bergsma and Lin (2006) gender and number list.</summary> private void LoadGenderNumber(string file, string neutralWordsFile) { try { using (BufferedReader reader = IOUtils.ReaderFromString(file)) { GetWordsFromFile(neutralWordsFile, neutralWords, false); string[] split = new string[2]; string[] countStr = new string[3]; for (string line; (line = reader.ReadLine()) != null;) { StringUtils.SplitOnChar(split, line, '\t'); StringUtils.SplitOnChar(countStr, split[1], ' '); int male = System.Convert.ToInt32(countStr[0]); int female = System.Convert.ToInt32(countStr[1]); int neutral = System.Convert.ToInt32(countStr[2]); Dictionaries.Gender gender = Dictionaries.Gender.Unknown; if (male * 0.5 > female + neutral && male > 2) { gender = Dictionaries.Gender.Male; } else { if (female * 0.5 > male + neutral && female > 2) { gender = Dictionaries.Gender.Female; } else { if (neutral * 0.5 > male + female && neutral > 2) { gender = Dictionaries.Gender.Neutral; } } } if (gender == Dictionaries.Gender.Unknown) { continue; } string[] words = split[0].Split(" "); IList <string> tokens = Arrays.AsList(words); genderNumber[tokens] = gender; } } } catch (IOException e) { throw new RuntimeIOException(e); } }
/// <summary>This constructor is used to recreate a CorefMention following serialization.</summary> public CorefMention(Dictionaries.MentionType mentionType, Dictionaries.Number number, Dictionaries.Gender gender, Dictionaries.Animacy animacy, int startIndex, int endIndex, int headIndex, int corefClusterID, int mentionID, int sentNum, IntTuple position, string mentionSpan) { this.mentionType = mentionType; this.number = number; this.gender = gender; this.animacy = animacy; this.startIndex = startIndex; this.endIndex = endIndex; this.headIndex = headIndex; this.corefClusterID = corefClusterID; this.mentionID = mentionID; this.sentNum = sentNum; this.position = position; this.mentionSpan = mentionSpan; }
/// <summary>This constructor builds the external CorefMention class from the internal Mention.</summary> public CorefMention(Mention m, IntTuple pos) { mentionType = m.mentionType; number = m.number; gender = m.gender; animacy = m.animacy; startIndex = m.startIndex + 1; endIndex = m.endIndex + 1; headIndex = m.headIndex + 1; corefClusterID = m.corefClusterID; sentNum = m.sentNum + 1; mentionID = m.mentionID; mentionSpan = m.SpanToString(); // index starts from 1 position = new IntTuple(2); position.Set(0, pos.Get(0) + 1); position.Set(1, pos.Get(1) + 1); m.headWord.Set(typeof(CorefCoreAnnotations.CorefClusterIdAnnotation), corefClusterID); }
/// <summary>Loads the CorefChain objects from the serialized buffer.</summary> /// <param name="reader">the buffer</param> /// <returns>A map from cluster id to clusters</returns> /// <exception cref="System.IO.IOException"/> private static IDictionary <int, CorefChain> LoadCorefChains(BufferedReader reader) { string line = reader.ReadLine().Trim(); if (line.IsEmpty()) { return(null); } int clusterCount = System.Convert.ToInt32(line); IDictionary <int, CorefChain> chains = Generics.NewHashMap(); // read each cluster for (int c = 0; c < clusterCount; c++) { line = reader.ReadLine().Trim(); string[] bits = line.Split("\\s"); int cid = System.Convert.ToInt32(bits[0]); int mentionCount = System.Convert.ToInt32(bits[1]); IDictionary <IntPair, ICollection <CorefChain.CorefMention> > mentionMap = Generics.NewHashMap(); CorefChain.CorefMention representative = null; // read each mention in this cluster for (int m = 0; m < mentionCount; m++) { line = reader.ReadLine(); bits = line.Split("\\s"); IntPair key = new IntPair(System.Convert.ToInt32(bits[0]), System.Convert.ToInt32(bits[1])); bool rep = bits[2].Equals("1"); Dictionaries.MentionType mentionType = ParseMentionType(bits[3]); Dictionaries.Number number = ParseNumber(bits[4]); Dictionaries.Gender gender = ParseGender(bits[5]); Dictionaries.Animacy animacy = ParseAnimacy(bits[6]); int startIndex = System.Convert.ToInt32(bits[7]); int endIndex = System.Convert.ToInt32(bits[8]); int headIndex = System.Convert.ToInt32(bits[9]); int clusterID = System.Convert.ToInt32(bits[10]); int mentionID = System.Convert.ToInt32(bits[11]); int sentNum = System.Convert.ToInt32(bits[12]); int posLen = System.Convert.ToInt32(bits[13]); int[] posElems = new int[posLen]; for (int i = 0; i < posLen; i++) { posElems[i] = System.Convert.ToInt32(bits[14 + i]); } IntTuple position = new IntTuple(posElems); string span = UnescapeSpace(bits[14 + posLen]); CorefChain.CorefMention mention = new CorefChain.CorefMention(mentionType, number, gender, animacy, startIndex, endIndex, headIndex, clusterID, mentionID, sentNum, position, span); ICollection <CorefChain.CorefMention> mentionsWithThisHead = mentionMap[key]; if (mentionsWithThisHead == null) { mentionsWithThisHead = Generics.NewHashSet(); mentionMap[key] = mentionsWithThisHead; } mentionsWithThisHead.Add(mention); if (rep) { representative = mention; } } // construct the cluster CorefChain chain = new CorefChain(cid, mentionMap, representative); chains[cid] = chain; } reader.ReadLine(); return(chains); }
// static class /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string input = null; string output = null; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input")) { input = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { output = args[argIndex + 1]; argIndex += 2; } else { throw new ArgumentException("Unknown argument " + args[argIndex]); } } } if (input == null) { throw new ArgumentException("Must specify input with -input"); } if (output == null) { throw new ArgumentException("Must specify output with -output"); } IDictionary <IList <string>, Dictionaries.Gender> genderNumber = Generics.NewHashMap(); BufferedReader reader = IOUtils.ReaderFromString(input); for (string line; (line = reader.ReadLine()) != null;) { string[] split = line.Split("\t"); string[] countStr = split[1].Split(" "); int male = System.Convert.ToInt32(countStr[0]); int female = System.Convert.ToInt32(countStr[1]); int neutral = System.Convert.ToInt32(countStr[2]); Dictionaries.Gender gender = Dictionaries.Gender.Unknown; if (male * 0.5 > female + neutral && male > 2) { gender = Dictionaries.Gender.Male; } else { if (female * 0.5 > male + neutral && female > 2) { gender = Dictionaries.Gender.Female; } else { if (neutral * 0.5 > male + female && neutral > 2) { gender = Dictionaries.Gender.Neutral; } } } if (gender == Dictionaries.Gender.Unknown) { continue; } string[] words = split[0].Split(" "); IList <string> tokens = Arrays.AsList(words); genderNumber[tokens] = gender; } IOUtils.WriteObjectToFile(genderNumber, output); }