/// <summary> /// Detect sentences in a string. /// </summary> /// <param name="input"> /// The string to be processed. /// </param> /// <param name="positions"> /// Start index and length of each detected sentence. /// </param> /// <returns> /// A string array containing individual sentences as elements. /// </returns> public string[] SentenceDetect(string input, out Util.Pair <int, int>[] positions) { int[] startsList = SentencePositionDetect(input); positions = new Util.Pair <int, int>[] { new Util.Pair <int, int>(0, input.Length) }; if (startsList.Length == 0) { return(new string[] { input }); } bool isLeftover = startsList[startsList.Length - 1] != input.Length; string[] sentences = new string[isLeftover ? startsList.Length + 1 : startsList.Length]; positions = new Util.Pair <int, int> [sentences.Length]; positions[0] = new Util.Pair <int, int>(0, (startsList[0]) - (0)); sentences[0] = input.Substring(positions[0].FirstValue, positions[0].SecondValue); for (int currentStart = 1; currentStart < startsList.Length; currentStart++) { positions[currentStart] = new Util.Pair <int, int>(startsList[currentStart - 1], (startsList[currentStart]) - (startsList[currentStart - 1])); sentences[currentStart] = input.Substring(positions[currentStart].FirstValue, positions[currentStart].SecondValue); } if (isLeftover) { positions[sentences.Length - 1] = new Util.Pair <int, int>(startsList[startsList.Length - 1], input.Length - startsList[startsList.Length - 1]); sentences[sentences.Length - 1] = input.Substring(positions[sentences.Length - 1].FirstValue); } return(sentences); }
/// <summary> /// Generates a Parse structure from the specified tree-bank style parse string. /// </summary> /// <param name="parse"> /// A tree-bank style parse string. /// </param> /// <returns> /// a Parse structure for the specified tree-bank style parse string. /// </returns> public static Parse FromParseString(string parse) { StringBuilder textBuffer = new StringBuilder(); int offset = 0; Stack <Util.Pair <string, int> > parseStack = new Stack <Util.Pair <string, int> >(); List <Util.Pair <string, Util.Span> > consitutents = new List <Util.Pair <string, Util.Span> >(); for (int currentChar = 0, charCount = parse.Length; currentChar < charCount; currentChar++) { char c = parse[currentChar]; if (c == '(') { string rest = parse.Substring(currentChar + 1); string type = GetType(rest); if (type == null) { throw new ParseException("null type for: " + rest); } string token = GetToken(rest); parseStack.Push(new Util.Pair <string, int>(type, offset)); if ((object)token != null && type != "-NONE-") { consitutents.Add(new Util.Pair <string, Util.Span>(MaximumEntropyParser.TokenNode, new Util.Span(offset, offset + token.Length))); textBuffer.Append(token).Append(" "); offset += token.Length + 1; } } else if (c == ')') { Util.Pair <string, int> parts = parseStack.Pop(); string type = parts.FirstValue; if (type != "-NONE-") { int start = parts.SecondValue; consitutents.Add(new Util.Pair <string, Util.Span>(parts.FirstValue, new Util.Span(start, offset - 1))); } } } string text = textBuffer.ToString(); Parse rootParse = new Parse(text, new Util.Span(0, text.Length), MaximumEntropyParser.TopNode, 1); for (int currentConstituent = 0, constituentCount = consitutents.Count; currentConstituent < constituentCount; currentConstituent++) { Util.Pair <string, Util.Span> parts = consitutents[currentConstituent]; string type = parts.FirstValue; if (type != MaximumEntropyParser.TopNode) { Parse newConstituent = new Parse(text, parts.SecondValue, type, 1); rootParse.Insert(newConstituent); } } return(rootParse); }
/// <summary> /// Applies a nth-order Butterworth filter. /// </summary> /// <param name="factor"></param> /// <param name="filterOrder"></param> public DecimationFilter(int factor, int filterOrder, bool bidirectional) { this.factor = factor; Util.Pair<double,double> cornerFreqs = new Util.Pair<double,double>(Math.PI / factor, 0.0); FilterOrderSpec spec = FilterOrderSpec.CreateButterworthSpec(cornerFreqs, filterOrder, BandType.LowPass); Filter antiAliasFilter = FilterFactory.CreateIirFilter(spec); if(bidirectional) antiAliasFilter = new BidirectionalFilter(antiAliasFilter); Filter downSampleFilter = new DownSampleFilter(factor); filter = new FilterChain(new Filter[] { antiAliasFilter, downSampleFilter }); return; }
private void AddEvents(string line) { Util.Pair <ArrayList, ArrayList> linePair = ConvertAnnotatedString(line); ArrayList tokens = linePair.FirstValue; ArrayList outcomes = linePair.SecondValue; List <string> tags = new List <string>(); for (int currentToken = 0; currentToken < tokens.Count; currentToken++) { string[] context = mContextGenerator.GetContext(currentToken, tokens.ToArray(), tags.ToArray(), null); SharpEntropy.TrainingEvent posTrainingEvent = new SharpEntropy.TrainingEvent((string)outcomes[currentToken], context); tags.Add((string)outcomes[currentToken]); mEventList.Add(posTrainingEvent); } }
public static Util.Pair <ArrayList, ArrayList> ConvertAnnotatedString(string input) { ArrayList tokens = new ArrayList(); ArrayList outcomes = new ArrayList(); Util.StringTokenizer tokenizer = new Util.StringTokenizer(input); string token = tokenizer.NextToken(); while (token != null) { Util.Pair <string, string> linePair = Split(token); tokens.Add(linePair.FirstValue); outcomes.Add(linePair.SecondValue); token = tokenizer.NextToken(); } return(new Util.Pair <ArrayList, ArrayList>(tokens, outcomes)); }
/// <summary> /// Builds up the list of features based on the information in the object, /// which is a pair containing a string and and integer which /// indicates the index of the position we are investigating. /// </summary> public virtual string[] GetContext(Util.Pair <string, int> pair) { string data = pair.FirstValue; int index = pair.SecondValue; List <string> predicates = new List <string>(); predicates.Add("p=" + data.Substring(0, (index) - (0))); predicates.Add("s=" + data.Substring(index)); if (index > 0) { AddCharPredicates("p1", data[index - 1], predicates); if (index > 1) { AddCharPredicates("p2", data[index - 2], predicates); predicates.Add("p21=" + data[index - 2] + data[index - 1]); } else { predicates.Add("p2=bok"); } predicates.Add("p1f1=" + data[index - 1] + data[index]); } else { predicates.Add("p1=bok"); } AddCharPredicates("f1", data[index], predicates); if (index + 1 < data.Length) { AddCharPredicates("f2", data[index + 1], predicates); predicates.Add("f12=" + data[index] + data[index + 1]); } else { predicates.Add("f2=bok"); } if (data[0] == '&' && data[data.Length - 1] == ';') { predicates.Add("cc"); //character code } return(predicates.ToArray()); }
private void AddNewEvents(string token) { System.Text.StringBuilder buffer = mBuffer; buffer.Append(token.Trim()); int sentenceEndPosition = buffer.Length - 1; //add following word to stringbuilder if (mNext != null && token.Length > 0) { int positionAfterFirstWordInNext = mNext.IndexOf(" "); if (positionAfterFirstWordInNext != -1) { // should maybe changes this so that it usually adds a space // before the next sentence, but sometimes leaves no space. buffer.Append(" "); buffer.Append(mNext.Substring(0, (positionAfterFirstWordInNext) - (0))); } } for (System.Collections.IEnumerator iterator = mScanner.GetPositions(buffer).GetEnumerator(); iterator.MoveNext();) { int candidate = (int)iterator.Current; Util.Pair <System.Text.StringBuilder, int> pair = new Util.Pair <System.Text.StringBuilder, int>(buffer, candidate); string type = (candidate == sentenceEndPosition) ? "T" : "F"; SentenceDetectionEvent sentenceEvent = new SentenceDetectionEvent(type, mContextGenerator.GetContext(pair)); if (null != mTail) { mTail.NextEvent = sentenceEvent; mTail = sentenceEvent; } else if (null == mHead) { mHead = sentenceEvent; } else if (null == mHead.NextEvent) { mHead.NextEvent = mTail = sentenceEvent; } } buffer.Length = 0; }
/// <summary> /// Detect the position of the first words of sentences in a string. /// </summary> /// <param name="input"> /// The string to be processed. /// </param> /// <returns> /// A integer array containing the positions of the end index of /// every sentence /// </returns> public virtual int[] SentencePositionDetect(string input) { if (mUnicodeMapping) { input = Utils.MapUnicodeChars(input); } double sentenceProbability = 1; mSentenceProbs.Clear(); System.Text.StringBuilder buffer = new System.Text.StringBuilder(input); List <int> endersList = mScanner.GetPositions(input); List <int> positions = new List <int>(endersList.Count); for (int currentEnder = 0, enderCount = endersList.Count, index = 0; currentEnder < enderCount; currentEnder++) { int candidate = endersList[currentEnder]; int cInt = candidate; // skip over the leading parts of non-token final delimiters int firstWhiteSpace = GetFirstWhitespace(input, cInt + 1); if (((currentEnder + 1) < enderCount) && ((endersList[currentEnder + 1]) < firstWhiteSpace)) { continue; } Util.Pair <System.Text.StringBuilder, int> pair = new Util.Pair <System.Text.StringBuilder, int>(buffer, candidate); double[] probabilities = mModel.Evaluate(mContextGenerator.GetContext(pair)); string bestOutcome = mModel.GetBestOutcome(probabilities); sentenceProbability *= probabilities[mModel.GetOutcomeIndex(bestOutcome)]; if (bestOutcome.Equals("T") && IsAcceptableBreak(input, index, cInt)) { if (index != cInt) { positions.Add(GetFirstNonWhitespace(input, GetFirstWhitespace(input, cInt + 1))); //moIntegerPool.GetInteger(GetFirstNonWhitespace(input, GetFirstWhitespace(input, cInt + 1)))); mSentenceProbs.Add(probabilities[mModel.GetOutcomeIndex(bestOutcome)]); } index = cInt + 1; } } return(positions.ToArray()); }
public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy) { mPosModel = posModel; float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0; System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7); string line; while ((object)(line = sentenceReader.ReadLine()) != null) { sentences++; Util.Pair <ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line); ArrayList words = annotatedPair.FirstValue; ArrayList outcomes = annotatedPair.SecondValue; ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes); int count = 0; bool isSentenceOK = true; for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++) { total++; string tag = (string)tagIndex.Current; if (tag == (string)outcomes[count]) { correct++; } else { isSentenceOK = false; } } if (isSentenceOK) { sentencesCorrect++; } } accuracy = correct / total; sentenceAccuracy = sentencesCorrect / sentences; }
public virtual void CreatePdf(String dest) { //Initialize PDF document PdfDocument pdf = new PdfDocument(new PdfWriter(dest)); // Initialize document Document document = new Document(pdf); PdfFont font = PdfFontFactory.CreateFont(StandardFonts.TIMES_ROMAN); PdfFont bold = PdfFontFactory.CreateFont(StandardFonts.HELVETICA_BOLD); document.SetTextAlignment(TextAlignment.JUSTIFIED).SetHyphenation(new HyphenationConfig("en", "uk", 3, 3)) .SetFont(font).SetFontSize(11); StreamReader sr = File.OpenText(SRC); String name; String line; Paragraph p; bool title = true; int counter = 0; IList <Util.Pair <String, Util.Pair <String, int> > > toc = new List <Util.Pair <String, Util.Pair <String, int> > >(); while ((line = sr.ReadLine()) != null) { p = new Paragraph(line); p.SetKeepTogether(true); if (title) { name = String.Format("title{0:00}", counter++); Util.Pair <String, int> titlePage = new Util.Pair <string, int>(line, pdf.GetNumberOfPages()); p.SetFont(bold).SetFontSize(12).SetKeepWithNext(true).SetDestination(name).SetNextRenderer(new UpdatePageRenderer(p, titlePage)); title = false; document.Add(p); toc.Add(new Util.Pair <string, Util.Pair <string, int> >(name, titlePage)); } else { p.SetFirstLineIndent(36); if (String.IsNullOrEmpty(line)) { p.SetMarginBottom(12); title = true; } else { p.SetMarginBottom(0); } document.Add(p); } } document.Add(new AreaBreak(AreaBreakType.NEXT_PAGE)); p = new Paragraph().SetFont(bold).Add("Table of Contents").SetDestination("toc"); document.Add(p); toc.RemoveAt(0); IList <TabStop> tabstops = new List <TabStop>(); tabstops.Add(new TabStop(580, TabAlignment.RIGHT, new DottedLine())); foreach (Util.Pair <String, Util.Pair <String, int> > entry in toc) { Util.Pair <String, int> text = entry.Value; p = new Paragraph().AddTabStops(tabstops).Add(text.Key).Add(new Tab()).Add(text.Value.ToString()).SetAction (PdfAction.CreateGoTo(entry.Key)); document.Add(p); } //Close document document.Close(); }
public UpdatePageRenderer(Paragraph modelElement, Util.Pair <String, int> entry) : base(modelElement) { this.entry = entry; }
/// <summary> /// Builds up the list of features, anchored around a position within the /// StringBuilder. /// </summary> public virtual string[] GetContext(Util.Pair <System.Text.StringBuilder, int> pair) { string prefix; //string preceeding the eos character in the eos token. string previousToken; //space delimited token preceding token containing eos character. string suffix; //string following the eos character in the eos token. string nextToken; //space delimited token following token containsing eos character. System.Text.StringBuilder buffer = pair.FirstValue; int position = pair.SecondValue; //character offset of eos character in //if (first is string[]) //{ // string[] firstList = (string[])first; // previousToken = firstList[0]; // string current = firstList[1]; // prefix = current.Substring(0, (position) - (0)); // suffix = current.Substring(position + 1); // if (suffix.StartsWith(" ")) // { // mCollectFeatures.Add("sn"); // } // if (prefix.EndsWith(" ")) // { // mCollectFeatures.Add("pn"); // } // mCollectFeatures.Add("eos=" + current[position]); // nextToken = firstList[2]; //} //else //{ // //compute previous, next, prefix and suffix strings and space previous, space next features and eos features. // System.Text.StringBuilder buffer = (System.Text.StringBuilder)((Util.Pair)input).FirstValue; int lastIndex = buffer.Length - 1; // compute space previousToken and space next features. if (position > 0 && buffer[position - 1] == ' ') { mCollectFeatures.Add("sp"); } if (position < lastIndex && buffer[position + 1] == ' ') { mCollectFeatures.Add("sn"); } mCollectFeatures.Add("eos=" + buffer[position]); int prefixStart = PreviousSpaceIndex(buffer, position); int currentPosition = position; //assign prefix, stop if you run into a period though otherwise stop at space while (--currentPosition > prefixStart) { for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++) { if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter]) { prefixStart = currentPosition; currentPosition++; // this gets us out of while loop. break; } } } prefix = buffer.ToString(prefixStart, position - prefixStart).Trim(); int previousStart = PreviousSpaceIndex(buffer, prefixStart); previousToken = buffer.ToString(previousStart, prefixStart - previousStart).Trim(); int suffixEnd = NextSpaceIndex(buffer, position, lastIndex); currentPosition = position; while (++currentPosition < suffixEnd) { for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++) { if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter]) { suffixEnd = currentPosition; currentPosition--; // this gets us out of while loop. break; } } } int nextEnd = NextSpaceIndex(buffer, suffixEnd + 1, lastIndex + 1); if (position == lastIndex) { suffix = ""; nextToken = ""; } else { suffix = buffer.ToString(position + 1, suffixEnd - (position + 1)).Trim(); nextToken = buffer.ToString(suffixEnd + 1, nextEnd - (suffixEnd + 1)).Trim(); } mBuffer.Append("x="); mBuffer.Append(prefix); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (prefix.Length > 0) { mCollectFeatures.Add(System.Convert.ToString(prefix.Length, System.Globalization.CultureInfo.InvariantCulture)); if (IsFirstUpper(prefix)) { mCollectFeatures.Add("xcap"); } if (mInducedAbbreviations.Contains(prefix)) { mCollectFeatures.Add("xabbrev"); } } mBuffer.Append("v="); mBuffer.Append(previousToken); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (previousToken.Length > 0) { if (IsFirstUpper(previousToken)) { mCollectFeatures.Add("vcap"); } if (mInducedAbbreviations.Contains(previousToken)) { mCollectFeatures.Add("vabbrev"); } } mBuffer.Append("s="); mBuffer.Append(suffix); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (suffix.Length > 0) { if (IsFirstUpper(suffix)) { mCollectFeatures.Add("scap"); } if (mInducedAbbreviations.Contains(suffix)) { mCollectFeatures.Add("sabbrev"); } } mBuffer.Append("n="); mBuffer.Append(nextToken); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (nextToken.Length > 0) { if (IsFirstUpper(nextToken)) { mCollectFeatures.Add("ncap"); } if (mInducedAbbreviations.Contains(nextToken)) { mCollectFeatures.Add("nabbrev"); } } string[] context = mCollectFeatures.ToArray(); mCollectFeatures.Clear(); return(context); }
public virtual void CreatePdf(String dest) { PdfDocument pdf = new PdfDocument(new PdfWriter(dest)); pdf.GetCatalog().SetPageLayout(PdfName.TwoColumnRight); pdf.GetCatalog().SetPageMode(PdfName.UseThumbs); PdfPage page = pdf.AddNewPage(); page.SetPageLabel(PageLabelNumberingStyle.LOWERCASE_ROMAN_NUMERALS, null); Document document = new Document(pdf); document.Add(new Paragraph().Add("Page left blank intentionally")); document.Add(new AreaBreak()); document.Add(new Paragraph().Add("Page left blank intentionally")); document.Add(new AreaBreak()); document.Add(new Paragraph().Add("Page left blank intentionally")); document.Add(new AreaBreak()); page = pdf.GetLastPage(); page.SetPageLabel(PageLabelNumberingStyle.DECIMAL_ARABIC_NUMERALS, null, 1); PdfFont font = PdfFontFactory.CreateFont(StandardFonts.TIMES_ROMAN); PdfFont bold = PdfFontFactory.CreateFont(StandardFonts.HELVETICA_BOLD); document.SetTextAlignment(TextAlignment.JUSTIFIED).SetHyphenation(new HyphenationConfig("en", "uk", 3, 3)) .SetFont(font).SetFontSize(11); StreamReader sr = File.OpenText(SRC); String name; String line; Paragraph p; bool title = true; int counter = 0; IList <Util.Pair <String, Util.Pair <String, int> > > toc = new List <Util.Pair <String, Util.Pair <String, int> > >(); while ((line = sr.ReadLine()) != null) { p = new Paragraph(line); p.SetKeepTogether(true); if (title) { name = String.Format("title{0:00}", counter++); p.SetFont(bold).SetFontSize(12).SetKeepWithNext(true).SetDestination(name); title = false; document.Add(p); toc.Add(new Util.Pair <string, Util.Pair <string, int> >(name, new Util.Pair <string, int>(line, pdf.GetNumberOfPages()))); } else { p.SetFirstLineIndent(36); if (String.IsNullOrEmpty(line)) { p.SetMarginBottom(12); title = true; } else { p.SetMarginBottom(0); } document.Add(p); } } document.Add(new AreaBreak(AreaBreakType.NEXT_PAGE)); p = new Paragraph().SetFont(bold).Add("Table of Contents").SetDestination("toc"); document.Add(p); page = pdf.GetLastPage(); page.SetPageLabel(null, "TOC", 1); toc.RemoveAt(0); IList <TabStop> tabstops = new List <TabStop>(); tabstops.Add(new TabStop(580, TabAlignment.RIGHT, new DottedLine())); foreach (Util.Pair <String, Util.Pair <String, int> > entry in toc) { Util.Pair <String, int> text = entry.Value; p = new Paragraph().AddTabStops(tabstops).Add(text.Key).Add(new Tab()).Add(text.Value.ToString()).SetAction (PdfAction.CreateGoTo(entry.Key)); document.Add(p); } document.Close(); }
/// <summary> /// Detect the position of the first words of sentences in a string. /// </summary> /// <param name="input"> /// The string to be processed. /// </param> /// <returns> /// A integer array containing the positions of the end index of /// every sentence /// </returns> public virtual int[] SentencePositionDetect(string input) { if (mUnicodeMapping) { input = Utils.MapUnicodeChars(input); } double sentenceProbability = 1; mSentenceProbs.Clear(); System.Text.StringBuilder buffer = new System.Text.StringBuilder(input); List<int> endersList = mScanner.GetPositions(input); List<int> positions = new List<int>(endersList.Count); for (int currentEnder = 0, enderCount = endersList.Count, index = 0; currentEnder < enderCount; currentEnder++) { int candidate = endersList[currentEnder]; int cInt = candidate; // skip over the leading parts of non-token final delimiters int firstWhiteSpace = GetFirstWhitespace(input, cInt + 1); if (((currentEnder + 1) < enderCount) && ((endersList[currentEnder + 1]) < firstWhiteSpace)) { continue; } Util.Pair<System.Text.StringBuilder, int> pair = new Util.Pair<System.Text.StringBuilder, int>(buffer, candidate); double[] probabilities = mModel.Evaluate(mContextGenerator.GetContext(pair)); string bestOutcome = mModel.GetBestOutcome(probabilities); sentenceProbability *= probabilities[mModel.GetOutcomeIndex(bestOutcome)]; if (bestOutcome.Equals("T") && IsAcceptableBreak(input, index, cInt)) { if (index != cInt) { positions.Add(GetFirstNonWhitespace(input, GetFirstWhitespace(input, cInt + 1)));//moIntegerPool.GetInteger(GetFirstNonWhitespace(input, GetFirstWhitespace(input, cInt + 1)))); mSentenceProbs.Add(probabilities[mModel.GetOutcomeIndex(bestOutcome)]); } index = cInt + 1; } } return positions.ToArray(); }
/// <summary> /// Detect sentences in a string. /// </summary> /// <param name="input"> /// The string to be processed. /// </param> /// <param name="positions"> /// Start index and length of each detected sentence. /// </param> /// <returns> /// A string array containing individual sentences as elements. /// </returns> public string[] SentenceDetect(string input, out Util.Pair<int, int>[] positions) { int[] startsList = SentencePositionDetect(input); positions = new Util.Pair<int, int>[] { new Util.Pair<int, int>(0, input.Length) }; if (startsList.Length == 0) { return new string[] {input}; } bool isLeftover = startsList[startsList.Length - 1] != input.Length; string[] sentences = new string[isLeftover ? startsList.Length + 1 : startsList.Length]; positions = new Util.Pair<int, int>[sentences.Length]; positions[0] = new Util.Pair<int, int>(0, (startsList[0]) - (0)); sentences[0] = input.Substring(positions[0].FirstValue, positions[0].SecondValue); for (int currentStart = 1; currentStart < startsList.Length; currentStart++) { positions[currentStart] = new Util.Pair<int, int>(startsList[currentStart - 1], (startsList[currentStart]) - (startsList[currentStart - 1])); sentences[currentStart] = input.Substring(positions[currentStart].FirstValue, positions[currentStart].SecondValue); } if (isLeftover) { positions[sentences.Length - 1] = new Util.Pair<int, int>(startsList[startsList.Length - 1], input.Length - startsList[startsList.Length - 1]); sentences[sentences.Length - 1] = input.Substring(positions[sentences.Length - 1].FirstValue); } return (sentences); }
private void AddNewEvents(string token) { System.Text.StringBuilder buffer = mBuffer; buffer.Append(token.Trim()); int sentenceEndPosition = buffer.Length - 1; //add following word to stringbuilder if (mNext != null && token.Length > 0) { int positionAfterFirstWordInNext = mNext.IndexOf(" "); if (positionAfterFirstWordInNext != - 1) { // should maybe changes this so that it usually adds a space // before the next sentence, but sometimes leaves no space. buffer.Append(" "); buffer.Append(mNext.Substring(0, (positionAfterFirstWordInNext) - (0))); } } for (System.Collections.IEnumerator iterator = mScanner.GetPositions(buffer).GetEnumerator(); iterator.MoveNext(); ) { int candidate = (int) iterator.Current; Util.Pair<System.Text.StringBuilder, int> pair = new Util.Pair<System.Text.StringBuilder, int>(buffer, candidate); string type = (candidate == sentenceEndPosition) ? "T" : "F"; SentenceDetectionEvent sentenceEvent = new SentenceDetectionEvent(type, mContextGenerator.GetContext(pair)); if (null != mTail) { mTail.NextEvent = sentenceEvent; mTail = sentenceEvent; } else if (null == mHead) { mHead = sentenceEvent; } else if (null == mHead.NextEvent) { mHead.NextEvent = mTail = sentenceEvent; } } buffer.Length = 0; }