private SimpleMatrix ConcatenateContextWords(SimpleMatrix childVec, IntPair span, IList <string> words) { // TODO: factor out getting the words SimpleMatrix left = (span.GetSource() < 0) ? dvModel.GetStartWordVector() : dvModel.GetWordVector(words[span.GetSource()]); SimpleMatrix right = (span.GetTarget() >= words.Count) ? dvModel.GetEndWordVector() : dvModel.GetWordVector(words[span.GetTarget()]); return(NeuralUtils.Concatenate(childVec, left, right)); }
public virtual void Evaluate(Tree guess, Tree gold, PrintWriter pw, double weight) { IList <ILabel> words = guess.Yield(); int pos = 0; foreach (ILabel word in words) { ++pos; SimpleMatrix wv = model.GetWordVector(word.Value()); // would be faster but more implementation-specific if we // removed wv.equals if (wv == unk || wv.Equals(unk)) { pw.Printf(" Unknown word in position %d: %s%n", pos, word.Value()); unkWords.Add(word.Value()); } } }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string modelPath = null; string outputPath = null; string inputPath = null; string testTreebankPath = null; IFileFilter testTreebankFilter = null; IList <string> unusedArgs = Generics.NewArrayList(); for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model")) { modelPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { outputPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input")) { inputPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank")) { Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank"); argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1; testTreebankPath = treebankDescription.First(); testTreebankFilter = treebankDescription.Second(); } else { unusedArgs.Add(args[argIndex++]); } } } } } string[] newArgs = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]); LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs)); DVModel model = DVParser.GetModelFromLexicalizedParser(parser); File outputFile = new File(outputPath); FileSystem.CheckNotExistsOrFail(outputFile); FileSystem.MkdirOrFail(outputFile); int count = 0; if (inputPath != null) { Reader input = new BufferedReader(new FileReader(inputPath)); DocumentPreprocessor processor = new DocumentPreprocessor(input); foreach (IList <IHasWord> sentence in processor) { count++; // index from 1 IParserQuery pq = parser.ParserQuery(); if (!(pq is RerankingParserQuery)) { throw new ArgumentException("Expected a RerankingParserQuery"); } RerankingParserQuery rpq = (RerankingParserQuery)pq; if (!rpq.Parse(sentence)) { throw new Exception("Unparsable sentence: " + sentence); } IRerankerQuery reranker = rpq.RerankerQuery(); if (!(reranker is DVModelReranker.Query)) { throw new ArgumentException("Expected a DVModelReranker"); } DeepTree deepTree = ((DVModelReranker.Query)reranker).GetDeepTrees()[0]; IdentityHashMap <Tree, SimpleMatrix> vectors = deepTree.GetVectors(); foreach (KeyValuePair <Tree, SimpleMatrix> entry in vectors) { log.Info(entry.Key + " " + entry.Value); } FileWriter fout = new FileWriter(outputPath + File.separator + "sentence" + count + ".txt"); BufferedWriter bout = new BufferedWriter(fout); bout.Write(SentenceUtils.ListToString(sentence)); bout.NewLine(); bout.Write(deepTree.GetTree().ToString()); bout.NewLine(); foreach (IHasWord word in sentence) { OutputMatrix(bout, model.GetWordVector(word.Word())); } Tree rootTree = FindRootTree(vectors); OutputTreeMatrices(bout, rootTree, vectors); bout.Flush(); fout.Close(); } } }