// SentenceIndex.SentenceIteratorWithWords queryIndex(SurfacePattern pat){ // // // String[] n = pat.getSimplerTokensNext(); // String[] pr = pat.getSimplerTokensPrev(); // boolean rest = false; // if(n!=null){ // for(String e: n){ // if(!specialWords.contains(e)){ // rest = true; // break; // } // } // } // if(rest == false && pr!=null){ // for(String e: pr){ // if(!specialWords.contains(e) && !stopWords.contains(e)){ // rest = true; // break; // } // } // } // // } /// <summary>give all sentences that have these words</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Lucene.Queryparser.Classic.ParseException"/> internal virtual ICollection <string> QueryIndexGetSentences(CollectionValuedMap <string, string> words) { SetIndexReaderSearcher(); BooleanQuery query = new BooleanQuery(); string pkey = Token.GetKeyForClass(typeof(PatternsAnnotations.ProcessedTextAnnotation)); foreach (KeyValuePair <string, ICollection <string> > en in words) { bool processedKey = en.Key.Equals(pkey); foreach (string en2 in en.Value) { if (!processedKey || !stopWords.Contains(en2.ToLower())) { query.Add(new BooleanClause(new TermQuery(new Term(en.Key, en2)), BooleanClause.Occur.Must)); } } } //query.add(new BooleanClause(new TermQuery(new Term("textannotation","sonal")), BooleanClause.Occur.MUST)); // String queryStr = ""; // for(Map.Entry<String, Collection<String>> en: words.entrySet()){ // for(String en2: en.getValue()){ // queryStr+= " " + en.getKey() + ":"+en2; // } // } // QueryParser queryParser = new QueryParser(Version.LUCENE_42, "sentence", analyzer); // // queryParser.setDefaultOperator(QueryParser.Operator.AND); // // Query query = queryParser.parse(queryStr); //Map<String, List<CoreLabel>> sents = null; TopDocs tp = searcher.Search(query, int.MaxValue); ICollection <string> sentids = new HashSet <string>(); if (tp.totalHits > 0) { foreach (ScoreDoc s in tp.scoreDocs) { int docId = s.doc; Org.Apache.Lucene.Document.Document d = searcher.Doc(docId); // byte[] sent = d.getBinaryValue("tokens").bytes; // if(saveTokens) { // sents = new HashMap<String, List<CoreLabel>>(); // List<CoreLabel> tokens = readProtoBufAnnotation(sent); // sents.put(d.get("sentid"), tokens); // } else{ sentids.Add(d.Get("sentid")); } } else { //} throw new Exception("how come no documents for " + words + ". Query formed is " + query); } //System.out.println("number of sentences for tokens " + words + " are " + sentids); // if(!saveTokens){ // sents = getSentences(sentids); // } return(sentids); }
/// <exception cref="System.IO.IOException"/> public virtual void ListAllDocuments() { SetIndexReaderSearcher(); for (int i = 0; i < reader.NumDocs(); i++) { Org.Apache.Lucene.Document.Document d = searcher.Doc(i); // byte[] sent = d.getBinaryValue("tokens").bytes; // List<CoreLabel> tokens = readProtoBufAnnotation(sent); System.Console.Out.WriteLine(d.Get("sentid")); } }