Exemple #1
0
        public static Parse[] ParseLine(string line, IParser parser, int numParses)
        {
            line = untokenizedParentPattern1.Replace(line, "$1 $2");
            line = untokenizedParentPattern2.Replace(line, "$1 $2");

            var str    = new StringTokenizer(line);
            var sb     = new StringBuilder();
            var tokens = new List <string>();

            while (str.HasMoreTokens)
            {
                var tok = str.NextToken;
                tokens.Add(tok);
                sb.Append(tok).Append(" ");
            }
            var text  = sb.ToString(0, sb.Length - 1);
            var p     = new Parse(text, new Span(0, text.Length), AbstractBottomUpParser.INC_NODE, 0, 0);
            var start = 0;

            for (var i = 0; i < tokens.Count; i++)
            {
                p.Insert(new Parse(text, new Span(start, start + tokens[i].Length), AbstractBottomUpParser.TOK_NODE, 0, i));
                start += tokens[i].Length + 1;
            }

            return(numParses == 1 ? new[] { parser.Parse(p) } : parser.Parse(p, numParses));
        }
        /// <summary>
        /// Gets the outcome patterns.
        /// </summary>
        /// <returns>System.Int32[][].</returns>
        protected int[][] GetOutcomePatterns()
        {
            var numOCTypes      = ReadInt();
            var outcomePatterns = new int[numOCTypes][];

            for (var i = 0; i < numOCTypes; i++)
            {
                var tok      = new StringTokenizer(ReadString(), " ");
                var infoInts = new int[tok.CountTokens];
                for (var j = 0; tok.HasMoreTokens; j++)
                {
                    infoInts[j] = int.Parse(tok.NextToken);
                }
                outcomePatterns[i] = infoInts;
            }
            return(outcomePatterns);
        }
Exemple #3
0
        /// <summary>
        /// Returns the next object. Calling this method repeatedly until it returns ,
        /// null will return each object from the underlying source exactly once.
        /// </summary>
        /// <returns>
        /// The next object or null to signal that the stream is exhausted.
        /// </returns>
        public virtual Event Read()
        {
            string line = Reader.ReadLine();

            if (line == null)
            {
                return(null);
            }

            var    st      = new StringTokenizer(line);
            string outcome = st.NextToken;
            var    count   = st.CountTokens;
            var    context = new string[count];

            for (int i = 0; i < count; i++)
            {
                context[i] = st.NextToken;
            }
            return(new Event(outcome, context));
        }
Exemple #4
0
        public static Parse[] ParseLine(string line, IParser parser, int numParses) {
            line = untokenizedParentPattern1.Replace(line, "$1 $2");
            line = untokenizedParentPattern2.Replace(line, "$1 $2");

            var str = new StringTokenizer(line);
            var sb = new StringBuilder();
            var tokens = new List<string>();
            while (str.HasMoreTokens) {
                var tok = str.NextToken;
                tokens.Add(tok);
                sb.Append(tok).Append(" ");
            }
            var text = sb.ToString(0, sb.Length - 1);
            var p = new Parse(text, new Span(0, text.Length), AbstractBottomUpParser.INC_NODE, 0, 0);
            var start = 0;

            for (var i = 0; i < tokens.Count; i++) {
                p.Insert(new Parse(text, new Span(start, start + tokens[i].Length), AbstractBottomUpParser.TOK_NODE, 0, i));
                start += tokens[i].Length + 1;
            }

            return numParses == 1 ? new[] { parser.Parse(p) } : parser.Parse(p, numParses);
        }
Exemple #5
0
        private void ProcessLeaf(AdLeaf leaf, List <string> sentence, List <string> tags, List <string> con, List <string> prop)
        {
            if (leaf == null)
            {
                return;
            }

            var lexeme = leaf.Lexeme;

            // this will change half of the quotation marks
            if ("«" == lexeme || "»" == lexeme)
            {
                if (callsCount % 2 == 0)
                {
                    lexeme = "\"";
                }
            }
            var tag = leaf.FunctionalTag;

            string contraction = null;

            if (leaf.SecondaryTag != null)
            {
                if (leaf.SecondaryTag.Contains("<sam->"))
                {
                    contraction = "B";
                }
                else if (leaf.SecondaryTag.Contains("<-sam>"))
                {
                    contraction = "E";
                }
            }

            if (tag == null)
            {
                tag = lexeme;
            }

            if (includeFeatures && !string.IsNullOrEmpty(leaf.MorphologicalTag))
            {
                tag += " " + leaf.MorphologicalTag;
            }

            tag = tag.RegExReplace(Expressions.Expression.Space, "=") ?? lexeme;
            //tag = tag.replaceAll("\\s+", "=");

            if (expandMe && lexeme.Contains("_"))
            {
                var tokenizer = new StringTokenizer(lexeme, "_");

                if (tag == "prop")
                {
                    sentence.Add(lexeme);
                    tags.Add(tag);
                    con.Add(null);
                    prop.Add("P");
                }
                else if (tokenizer.CountTokens > 0)
                {
                    var toks         = new List <string>(tokenizer.CountTokens);
                    var tagsWithCont = new List <string>(tokenizer.CountTokens);
                    toks.Add(tokenizer.NextToken);
                    tagsWithCont.Add("B-" + tag);
                    while (tokenizer.HasMoreTokens)
                    {
                        toks.Add(tokenizer.NextToken);
                        tagsWithCont.Add("I-" + tag);
                    }
                    if (contraction != null)
                    {
                        con.AddRange(new string[toks.Count - 1]);
                        con.Add(contraction);
                    }
                    else
                    {
                        con.AddRange(new string[toks.Count]);
                    }

                    sentence.AddRange(toks);
                    tags.AddRange(tagsWithCont);
                    prop.AddRange(new string[toks.Count]);
                }
                else
                {
                    sentence.Add(lexeme);
                    tags.Add(tag);
                    prop.Add(null);
                    con.Add(contraction);
                }
            }
            else if (lexeme.Contains(hyphen) && lexeme.Length > 1)
            {
                string firstTok = null;

                string secondTok = null;
                string rest      = null;

                var match = hyphenRegex.Match(lexeme);

                if (match.Success)
                {
                    if (match.Groups[1].Success)
                    {
                        firstTok = match.Groups[2].Value;
                    }
                    else if (match.Groups[3].Success)
                    {
                        secondTok = match.Groups[4].Value;
                        rest      = match.Groups[5].Value;
                    }
                    else if (match.Groups[6].Success)
                    {
                        firstTok  = match.Groups[7].Value;
                        secondTok = match.Groups[8].Value;
                        rest      = match.Groups[9].Value;
                    }
                    else
                    {
                        throw new InvalidFormatException("Wrong hyphen pattern.");
                    }

                    if (!string.IsNullOrEmpty(firstTok))
                    {
                        sentence.Add(firstTok);
                        tags.Add(tag);
                        prop.Add(null);
                        con.Add(contraction);
                    }
                    if (!string.IsNullOrEmpty(hyphen))
                    {
                        sentence.Add(hyphen);
                        tags.Add(hyphen);
                        prop.Add(null);
                        con.Add(contraction);
                    }
                    if (!string.IsNullOrEmpty(secondTok))
                    {
                        sentence.Add(secondTok);
                        tags.Add(tag);
                        prop.Add(null);
                        con.Add(contraction);
                    }
                    if (!string.IsNullOrEmpty(rest))
                    {
                        sentence.Add(rest);
                        tags.Add(tag);
                        prop.Add(null);
                        con.Add(contraction);
                    }
                }
                else
                {
                    sentence.Add(lexeme);
                    tags.Add(tag);
                    prop.Add(null);
                    con.Add(contraction);
                }
            }
            else
            {
                tag = AddGender(tag, leaf.MorphologicalTag);

                sentence.Add(lexeme);
                tags.Add(tag);
                prop.Add(null);
                con.Add(contraction);
            }
        }