Esempio n. 1
0
        private void LoadUniversalMap(string path)
        {
            LineNumberReader reader = null;

            try
            {
                reader = new LineNumberReader(new FileReader(path));
                for (string line; (line = reader.ReadLine()) != null;)
                {
                    if (line.Trim().Equals(string.Empty))
                    {
                        continue;
                    }
                    string[] toks = line.Trim().Split("\\s+");
                    if (toks.Length != 2)
                    {
                        throw new Exception("Invalid mapping line: " + line);
                    }
                    universalMap[toks[0]] = toks[1];
                }
                reader.Close();
            }
            catch (FileNotFoundException)
            {
                System.Console.Error.Printf("%s: File not found %s%n", this.GetType().FullName, path);
            }
            catch (IOException e)
            {
                int lineId = (reader == null) ? -1 : reader.GetLineNumber();
                System.Console.Error.Printf("%s: Error at line %d%n", this.GetType().FullName, lineId);
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Esempio n. 2
0
        public virtual void Setup(File path, params string[] options)
        {
            if (path == null || !path.Exists())
            {
                return;
            }
            LineNumberReader reader = null;

            try
            {
                reader = new LineNumberReader(new FileReader(path));
                bool insideTagMap = false;
                for (string line; (line = reader.ReadLine()) != null;)
                {
                    line = line.Trim();
                    Matcher isStartSymbol = startOfTagMap.Matcher(line);
                    insideTagMap = (isStartSymbol.Matches() || insideTagMap);
                    if (insideTagMap)
                    {
                        //Comment line
                        if (line.StartsWith(";"))
                        {
                            continue;
                        }
                        Matcher mappingLine = mapping.Matcher(line);
                        if (mappingLine.Find())
                        {
                            if (mappingLine.GroupCount() == numExpectedTokens)
                            {
                                string finalShortTag = ProcessShortTag(mappingLine.Group(1), mappingLine.Group(2));
                                tagMap[mappingLine.Group(1)] = finalShortTag;
                            }
                            else
                            {
                                System.Console.Error.Printf("%s: Skipping bad mapping in %s (line %d)%n", this.GetType().FullName, path.GetPath(), reader.GetLineNumber());
                            }
                        }
                        Matcher isEndSymbol = endOfTagMap.Matcher(line);
                        if (isEndSymbol.Matches())
                        {
                            break;
                        }
                    }
                }
                reader.Close();
            }
            catch (FileNotFoundException)
            {
                System.Console.Error.Printf("%s: Could not open mapping file %s%n", this.GetType().FullName, path.GetPath());
            }
            catch (IOException)
            {
                int lineNum = (reader == null) ? -1 : reader.GetLineNumber();
                System.Console.Error.Printf("%s: Error reading %s (line %d)%n", this.GetType().FullName, path.GetPath(), lineNum);
            }
        }
Esempio n. 3
0
        protected internal virtual ICollection <string> BuildSplitMap(string path)
        {
            path = DataFilePaths.Convert(path);
            ICollection <string> fileSet = Generics.NewHashSet();
            LineNumberReader     reader  = null;

            try
            {
                reader = new LineNumberReader(new FileReader(path));
                while (reader.Ready())
                {
                    string line = reader.ReadLine();
                    fileSet.Add(line.Trim());
                }
                reader.Close();
            }
            catch (FileNotFoundException)
            {
                System.Console.Error.Printf("%s: Could not open split file %s\n", this.GetType().FullName, path);
            }
            catch (IOException)
            {
                System.Console.Error.Printf("%s: Error reading split file %s (line %d)\n", this.GetType().FullName, path, reader.GetLineNumber());
            }
            return(fileSet);
        }
Esempio n. 4
0
        private ICollection <string> MakeSplitSet(string splitFileName)
        {
            splitFileName = DataFilePaths.Convert(splitFileName);
            ICollection <string> splitSet = Generics.NewHashSet();
            LineNumberReader     reader   = null;

            try
            {
                reader = new LineNumberReader(new FileReader(splitFileName));
                for (string line; (line = reader.ReadLine()) != null;)
                {
                    splitSet.Add(line.Trim());
                }
                reader.Close();
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                System.Console.Error.Printf("%s: Error reading %s (line %d)%n", this.GetType().FullName, splitFileName, reader.GetLineNumber());
                Sharpen.Runtime.PrintStackTrace(e);
            }
            return(splitSet);
        }
Esempio n. 5
0
        public virtual void Parse()
        {
            int lineNum = 0;

            try
            {
                LineNumberReader reader           = new LineNumberReader(new FileReader(configFile));
                Properties       paramsForDataset = null;
                while (reader.Ready())
                {
                    string line = reader.ReadLine();
                    lineNum = reader.GetLineNumber();
                    //For exception handling
                    Matcher m = skipLine.Matcher(line);
                    if (m.LookingAt())
                    {
                        continue;
                    }
                    m = setDelim.Matcher(line);
                    if (m.Matches() && paramsForDataset != null)
                    {
                        datasetList.Add(paramsForDataset);
                        paramsForDataset = null;
                        continue;
                    }
                    else
                    {
                        if (paramsForDataset == null)
                        {
                            paramsForDataset = new Properties();
                        }
                    }
                    bool matched = false;
                    foreach (string param in patternsMap.Keys)
                    {
                        Pair <Pattern, Pattern> paramTemplate = patternsMap[param];
                        Matcher paramToken = paramTemplate.first.Matcher(line);
                        if (paramToken.LookingAt())
                        {
                            matched = true;
                            string[] tokens = line.Split(Delim);
                            if (tokens.Length != 2)
                            {
                                System.Console.Error.Printf("%s: Skipping malformed parameter in %s (line %d)%n", this.GetType().FullName, configFile, reader.GetLineNumber());
                                break;
                            }
                            string actualParam = tokens[0].Trim();
                            string paramValue  = tokens[1].Trim();
                            if (paramTemplate.second != null)
                            {
                                paramToken = paramTemplate.second.Matcher(paramValue);
                                if (paramToken.Matches())
                                {
                                    paramsForDataset.SetProperty(actualParam, paramValue);
                                }
                                else
                                {
                                    System.Console.Error.Printf("%s: Skipping illegal parameter value in %s (line %d)%n", this.GetType().FullName, configFile, reader.GetLineNumber());
                                    break;
                                }
                            }
                            else
                            {
                                paramsForDataset.SetProperty(actualParam, paramValue);
                            }
                        }
                    }
                    if (!matched)
                    {
                        string error = this.GetType().FullName + ": Unknown token in " + configFile + " (line " + reader.GetLineNumber() + ")%n";
                        System.Console.Error.Printf(error);
                        throw new ArgumentException(error);
                    }
                }
                if (paramsForDataset != null)
                {
                    datasetList.Add(paramsForDataset);
                }
                reader.Close();
            }
            catch (FileNotFoundException)
            {
                System.Console.Error.Printf("%s: Cannot open file %s%n", this.GetType().FullName, configFile);
            }
            catch (IOException)
            {
                System.Console.Error.Printf("%s: Error reading %s (line %d)%n", this.GetType().FullName, configFile, lineNum);
            }
        }
        /// <summary>Read in typed dependencies.</summary>
        /// <remarks>
        /// Read in typed dependencies. Warning created typed dependencies are not
        /// backed by any sort of a tree structure.
        /// </remarks>
        /// <param name="filename"/>
        /// <exception cref="System.IO.IOException"/>
        protected internal static IList <ICollection <TypedDependency> > ReadDeps(string filename)
        {
            LineNumberReader breader = new LineNumberReader(new FileReader(filename));
            IList <ICollection <TypedDependency> > readDeps = new List <ICollection <TypedDependency> >();
            ICollection <TypedDependency>          deps     = new List <TypedDependency>();

            for (string line = breader.ReadLine(); line != null; line = breader.ReadLine())
            {
                if (line.Equals("null(-0,-0)") || line.Equals("null(-1,-1)"))
                {
                    readDeps.Add(deps);
                    deps = new List <TypedDependency>();
                    continue;
                }
                // relex parse error
                try
                {
                    if (line.Equals(string.Empty))
                    {
                        if (deps.Count != 0)
                        {
                            //System.out.println(deps);
                            readDeps.Add(deps);
                            deps = new List <TypedDependency>();
                        }
                        continue;
                    }
                    int    firstParen        = line.IndexOf("(");
                    int    commaSpace        = line.IndexOf(", ");
                    string depName           = Sharpen.Runtime.Substring(line, 0, firstParen);
                    string govName           = Sharpen.Runtime.Substring(line, firstParen + 1, commaSpace);
                    string childName         = Sharpen.Runtime.Substring(line, commaSpace + 2, line.Length - 1);
                    GrammaticalRelation grel = GrammaticalRelation.ValueOf(depName);
                    if (depName.StartsWith("prep_"))
                    {
                        string prep = Sharpen.Runtime.Substring(depName, 5);
                        grel = EnglishGrammaticalRelations.GetPrep(prep);
                    }
                    if (depName.StartsWith("prepc_"))
                    {
                        string prepc = Sharpen.Runtime.Substring(depName, 6);
                        grel = EnglishGrammaticalRelations.GetPrepC(prepc);
                    }
                    if (depName.StartsWith("conj_"))
                    {
                        string conj = Sharpen.Runtime.Substring(depName, 5);
                        grel = EnglishGrammaticalRelations.GetConj(conj);
                    }
                    if (grel == null)
                    {
                        throw new Exception("Unknown grammatical relation '" + depName + "'");
                    }
                    //Word govWord = new Word(govName.substring(0, govDash));
                    IndexedWord govWord = new IndexedWord();
                    govWord.SetValue(NormalizeNumbers(govName));
                    govWord.SetWord(govWord.Value());
                    //Word childWord = new Word(childName.substring(0, childDash));
                    IndexedWord childWord = new IndexedWord();
                    childWord.SetValue(NormalizeNumbers(childName));
                    childWord.SetWord(childWord.Value());
                    TypedDependency dep = new DependencyScoring.TypedDependencyStringEquality(grel, govWord, childWord);
                    deps.Add(dep);
                }
                catch (Exception e)
                {
                    breader.Close();
                    throw new Exception("Error on line " + breader.GetLineNumber() + ":\n\n" + e);
                }
            }
            if (deps.Count != 0)
            {
                readDeps.Add(deps);
            }
            //log.info("last: "+readDeps.get(readDeps.size()-1));
            breader.Close();
            return(readDeps);
        }
        public virtual void Build()
        {
            LineNumberReader infile        = null;
            PrintWriter      outfile       = null;
            string           currentInfile = string.Empty;

            try
            {
                outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFileName), "UTF-8")));
                foreach (File path in pathsToData)
                {
                    infile        = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8")));
                    currentInfile = path.GetPath();
                    while (infile.Ready())
                    {
                        List <Word> sent = SentenceUtils.ToUntaggedList(infile.ReadLine().Split("\\s+"));
                        foreach (Word token in sent)
                        {
                            Matcher hasArabic = utf8ArabicChart.Matcher(token.Word());
                            if (hasArabic.Find())
                            {
                                token.SetWord(escaper.Apply(token.Word()));
                                token.SetWord(lexMapper.Map(null, token.Word()));
                            }
                        }
                        outfile.Println(SentenceUtils.ListToString(sent));
                    }
                    toStringBuffer.Append(string.Format(" Read %d input lines from %s", infile.GetLineNumber(), path.GetPath()));
                }
                infile.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                System.Console.Error.Printf("%s: Filesystem does not support UTF-8 output\n", this.GetType().FullName);
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException)
            {
                System.Console.Error.Printf("%s: Could not open %s for writing\n", this.GetType().FullName, outFileName);
            }
            catch (IOException)
            {
                System.Console.Error.Printf("%s: Error reading from %s (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber());
            }
            catch (Exception e)
            {
                System.Console.Error.Printf("%s: Input sentence from %s contains token mapped to null (line %d)\n", this.GetType().FullName, currentInfile, infile.GetLineNumber());
                Sharpen.Runtime.PrintStackTrace(e);
            }
            finally
            {
                if (outfile != null)
                {
                    outfile.Close();
                }
            }
        }