private void AddNewEvents() { List <string> tokenList = new List <string>(); List <string> tagList = new List <string>(); List <string> predicateList = new List <string>(); for (string line = mDataReader.NextToken(); line.Length > 0; line = mDataReader.NextToken()) { string[] parts = line.Split(' '); if (parts.Length != 3) { //skip this line; it is in error } else { tokenList.Add(parts[0]); tagList.Add(parts[1]); predicateList.Add(parts[2]); } } mEvents = new SharpEntropy.TrainingEvent[tokenList.Count]; string[] tokens = tokenList.ToArray(); string[] tags = tagList.ToArray(); string[] predicates = predicateList.ToArray(); for (int eventIndex = 0, eventCount = mEvents.Length; eventIndex < eventCount; eventIndex++) { mEvents[eventIndex] = new SharpEntropy.TrainingEvent(predicates[eventIndex], mContextGenerator.GetContext(eventIndex, tokens, tags, predicates)); } }
public virtual SharpEntropy.TrainingEvent ReadNextEvent() { SharpEntropy.TrainingEvent nextEvent = mEvents[mEventIndex]; mEventIndex++; if (mEventIndex == mEvents.Count) { mEvents.Clear(); mEventIndex = 0; } return(nextEvent); }
private void AddEvents(string line) { var linePair = ConvertAnnotatedString(line); var tokens = linePair.Item1; var outcomes = linePair.Item2; var tags = new List <string>(); for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { string[] context = _contextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), null); var posTrainingEvent = new SharpEntropy.TrainingEvent(outcomes[currentToken], context); tags.Add(outcomes[currentToken]); _eventList.Add(posTrainingEvent); } }
private void AddEvents(string line) { Util.Pair <ArrayList, ArrayList> linePair = ConvertAnnotatedString(line); ArrayList tokens = linePair.FirstValue; ArrayList outcomes = linePair.SecondValue; List <string> tags = new List <string>(); for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { string[] context = mContextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), null); SharpEntropy.TrainingEvent posTrainingEvent = new SharpEntropy.TrainingEvent((string)outcomes[currentToken], context); tags.Add((string)outcomes[currentToken]); mEventList.Add(posTrainingEvent); } }
public virtual SharpEntropy.TrainingEvent ReadNextEvent() { SharpEntropy.TrainingEvent trainingEvent = _eventList[_currentEvent]; _currentEvent++; if (_eventList.Count == _currentEvent) { _currentEvent = 0; _eventList.Clear(); string nextLine = _textReader.ReadLine(); if (nextLine != null) { AddEvents(nextLine); } } return(trainingEvent); }
public virtual SharpEntropy.TrainingEvent ReadNextEvent() { SharpEntropy.TrainingEvent trainingEvent = mEventList[mCurrentEvent]; mCurrentEvent++; if (mEventList.Count == mCurrentEvent) { mCurrentEvent = 0; mEventList.Clear(); string nextLine = mTextReader.ReadLine(); if (nextLine != null) { AddEvents(nextLine); } } return(trainingEvent); }
/// <summary> /// Adds name events for the specified sentence. /// </summary> /// <param name="sentence"> /// The sentence for which name events should be added. /// </param> private void AddEvents(string sentence) { string[] parts = sentence.Split(' '); string outcome = MaximumEntropyNameFinder.Other; List <string> tokens = new List <string>(); List <string> outcomesList = new List <string>(); for (int currentPart = 0, partCount = parts.Length; currentPart < partCount; currentPart++) { if (parts[currentPart] == "<START>") { outcome = MaximumEntropyNameFinder.Start; } else if (parts[currentPart] == "<END>") { outcome = MaximumEntropyNameFinder.Other; } else { //regular token tokens.Add(parts[currentPart]); outcomesList.Add(outcome); if (outcome == MaximumEntropyNameFinder.Start) { outcome = MaximumEntropyNameFinder.Continue; } } } mEvents = new SharpEntropy.TrainingEvent[tokens.Count]; for (int currentToken = 0, tokenCount = tokens.Count; currentToken < tokenCount; currentToken++) { mEvents[currentToken] = new SharpEntropy.TrainingEvent(outcomesList[currentToken], mContextGenerator.GetContext(currentToken, tokens, outcomesList, mPreviousTags)); } for (int currentToken = 0, tokenCount = tokens.Count; currentToken < tokenCount; currentToken++) { mPreviousTags[tokens[currentToken]] = outcomesList[currentToken]; } }
// Methods -------------------- private void AddEvents(string line) { string[] wordsWithSeparatorToken = line.Split(' '); foreach (string wordWithSeparatorToken in wordsWithSeparatorToken) { var parts = wordWithSeparatorToken.Split(_tokenSeparator); var indicesOfSeparators = new List <int>(); for (var i = 1; i < parts.Length; i++) { var indexOfSeparator = parts.Where((p, index) => index < i).Sum(p => p.Length); indicesOfSeparators.Add(indexOfSeparator); } var word = string.Join("", parts); for (int index = 0; index < word.Length; index++) { string[] context = ContextGenerator.GetContext(new Tuple <string, int>(word, index)); var outcome = indicesOfSeparators.Contains(index) ? "T" : "F"; var trainingEvent = new SharpEntropy.TrainingEvent(outcome, context); _eventList.Add(trainingEvent); } } }
private void AddNewEvents() { List<string> tokenList = new List<string>(); List<string> tagList = new List<string>(); List<string> predicateList = new List<string>(); for (string line = mDataReader.NextToken(); line.Length > 0; line = mDataReader.NextToken()) { string[] parts = line.Split(' '); if (parts.Length != 3) { //skip this line; it is in error } else { tokenList.Add(parts[0]); tagList.Add(parts[1]); predicateList.Add(parts[2]); } } mEvents = new SharpEntropy.TrainingEvent[tokenList.Count]; string[] tokens = tokenList.ToArray(); string[] tags = tagList.ToArray(); string[] predicates = predicateList.ToArray(); for (int eventIndex = 0, eventCount = mEvents.Length; eventIndex < eventCount; eventIndex++) { mEvents[eventIndex] = new SharpEntropy.TrainingEvent(predicates[eventIndex], mContextGenerator.GetContext(eventIndex, tokens, tags, predicates)); } }
private void AddEvents(string line) { Util.Pair<ArrayList, ArrayList> linePair = ConvertAnnotatedString(line); ArrayList tokens = linePair.FirstValue; ArrayList outcomes = linePair.SecondValue; List<string> tags = new List<string>(); for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { string[] context = mContextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), null); SharpEntropy.TrainingEvent posTrainingEvent = new SharpEntropy.TrainingEvent((string) outcomes[currentToken], context); tags.Add((string)outcomes[currentToken]); mEventList.Add(posTrainingEvent); } }
/// <summary> /// Adds name events for the specified sentence. /// </summary> /// <param name="sentence"> /// The sentence for which name events should be added. /// </param> private void AddEvents(string sentence) { string[] parts = sentence.Split(' '); string outcome = MaximumEntropyNameFinder.Other; List<string> tokens = new List<string>(); List<string> outcomesList = new List<string>(); for (int currentPart = 0, partCount = parts.Length; currentPart < partCount; currentPart++) { if (parts[currentPart] == "<START>") { outcome = MaximumEntropyNameFinder.Start; } else if (parts[currentPart] == "<END>") { outcome = MaximumEntropyNameFinder.Other; } else { //regular token tokens.Add(parts[currentPart]); outcomesList.Add(outcome); if (outcome == MaximumEntropyNameFinder.Start) { outcome = MaximumEntropyNameFinder.Continue; } } } mEvents = new SharpEntropy.TrainingEvent[tokens.Count]; for (int currentToken = 0, tokenCount = tokens.Count; currentToken < tokenCount; currentToken++) { mEvents[currentToken] = new SharpEntropy.TrainingEvent(outcomesList[currentToken], mContextGenerator.GetContext(currentToken, tokens, outcomesList, mPreviousTags)); } for (int currentToken = 0, tokenCount = tokens.Count; currentToken < tokenCount; currentToken++) { mPreviousTags[tokens[currentToken]] = outcomesList[currentToken]; } }
// Methods -------------------- private void AddEvents(string line) { string[] wordsWithSeparatorToken = line.Split(' '); foreach (string wordWithSeparatorToken in wordsWithSeparatorToken) { var parts = wordWithSeparatorToken.Split(_tokenSeparator); var indicesOfSeparators = new List<int>(); for (var i = 1; i < parts.Length; i++) { var indexOfSeparator = parts.Where((p, index) => index < i).Sum(p => p.Length); indicesOfSeparators.Add(indexOfSeparator); } var word = string.Join("", parts); for (int index = 0; index < word.Length; index++) { string[] context = ContextGenerator.GetContext(new Tuple<string, int>(word, index)); var outcome = indicesOfSeparators.Contains(index) ? "T" : "F"; var trainingEvent = new SharpEntropy.TrainingEvent(outcome, context); _eventList.Add(trainingEvent); } } }
private void AddEvents(string line) { var linePair = ConvertAnnotatedString(line); var tokens = linePair.Item1; var outcomes = linePair.Item2; var tags = new List<string>(); for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { string[] context = _contextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), null); var posTrainingEvent = new SharpEntropy.TrainingEvent(outcomes[currentToken], context); tags.Add(outcomes[currentToken]); _eventList.Add(posTrainingEvent); } }