public override IDictionary <int, ICollection <E> > GetPatternsForAllTokens(string sentId) { try { TermQuery query = new TermQuery(new Term("sentid", sentId)); TopDocs tp = searcher.Search(query, 1); if (tp.totalHits > 0) { foreach (ScoreDoc s in tp.scoreDocs) { int docId = s.doc; Org.Apache.Lucene.Document.Document d = searcher.Doc(docId); byte[] st = d.GetBinaryValue("patterns").bytes; ByteArrayInputStream baip = new ByteArrayInputStream(st); ObjectInputStream ois = new ObjectInputStream(baip); return((IDictionary <int, ICollection <E> >)ois.ReadObject()); } } else { throw new Exception("Why no patterns for sentid " + sentId + ". Number of documents in index are " + Size()); } } catch (IOException e) { throw new Exception(e); } catch (TypeLoadException e) { throw new Exception(e); } return(null); }
protected internal override void Add(IList <CoreLabel> tokens, string sentid, bool addProcessedText) { try { SetIndexWriter(); Org.Apache.Lucene.Document.Document doc = new Org.Apache.Lucene.Document.Document(); foreach (CoreLabel l in tokens) { foreach (KeyValuePair <string, string> en in transformCoreLabeltoString.Apply(l)) { doc.Add(new StringField(en.Key, en.Value, Field.Store.Yes)); } //, ANALYZED)); if (addProcessedText) { string ptxt = l.Get(typeof(PatternsAnnotations.ProcessedTextAnnotation)); if (!stopWords.Contains(ptxt.ToLower())) { doc.Add(new StringField(Token.GetKeyForClass(typeof(PatternsAnnotations.ProcessedTextAnnotation)), ptxt, Field.Store.Yes)); } } } //, ANALYZED)); doc.Add(new StringField("sentid", sentid, Field.Store.Yes)); if (tokens != null && saveTokens) { doc.Add(new Field("tokens", GetProtoBufAnnotation(tokens), LuceneFieldType.NotIndexed)); } indexWriter.AddDocument(doc); } catch (IOException e) { throw new Exception(e); } }
// SentenceIndex.SentenceIteratorWithWords queryIndex(SurfacePattern pat){ // // // String[] n = pat.getSimplerTokensNext(); // String[] pr = pat.getSimplerTokensPrev(); // boolean rest = false; // if(n!=null){ // for(String e: n){ // if(!specialWords.contains(e)){ // rest = true; // break; // } // } // } // if(rest == false && pr!=null){ // for(String e: pr){ // if(!specialWords.contains(e) && !stopWords.contains(e)){ // rest = true; // break; // } // } // } // // } /// <summary>give all sentences that have these words</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Lucene.Queryparser.Classic.ParseException"/> internal virtual ICollection <string> QueryIndexGetSentences(CollectionValuedMap <string, string> words) { SetIndexReaderSearcher(); BooleanQuery query = new BooleanQuery(); string pkey = Token.GetKeyForClass(typeof(PatternsAnnotations.ProcessedTextAnnotation)); foreach (KeyValuePair <string, ICollection <string> > en in words) { bool processedKey = en.Key.Equals(pkey); foreach (string en2 in en.Value) { if (!processedKey || !stopWords.Contains(en2.ToLower())) { query.Add(new BooleanClause(new TermQuery(new Term(en.Key, en2)), BooleanClause.Occur.Must)); } } } //query.add(new BooleanClause(new TermQuery(new Term("textannotation","sonal")), BooleanClause.Occur.MUST)); // String queryStr = ""; // for(Map.Entry<String, Collection<String>> en: words.entrySet()){ // for(String en2: en.getValue()){ // queryStr+= " " + en.getKey() + ":"+en2; // } // } // QueryParser queryParser = new QueryParser(Version.LUCENE_42, "sentence", analyzer); // // queryParser.setDefaultOperator(QueryParser.Operator.AND); // // Query query = queryParser.parse(queryStr); //Map<String, List<CoreLabel>> sents = null; TopDocs tp = searcher.Search(query, int.MaxValue); ICollection <string> sentids = new HashSet <string>(); if (tp.totalHits > 0) { foreach (ScoreDoc s in tp.scoreDocs) { int docId = s.doc; Org.Apache.Lucene.Document.Document d = searcher.Doc(docId); // byte[] sent = d.getBinaryValue("tokens").bytes; // if(saveTokens) { // sents = new HashMap<String, List<CoreLabel>>(); // List<CoreLabel> tokens = readProtoBufAnnotation(sent); // sents.put(d.get("sentid"), tokens); // } else{ sentids.Add(d.Get("sentid")); } } else { //} throw new Exception("how come no documents for " + words + ". Query formed is " + query); } //System.out.println("number of sentences for tokens " + words + " are " + sentids); // if(!saveTokens){ // sents = getSentences(sentids); // } return(sentids); }
/// <exception cref="System.IO.IOException"/> public virtual void ListAllDocuments() { SetIndexReaderSearcher(); for (int i = 0; i < reader.NumDocs(); i++) { Org.Apache.Lucene.Document.Document d = searcher.Doc(i); // byte[] sent = d.getBinaryValue("tokens").bytes; // List<CoreLabel> tokens = readProtoBufAnnotation(sent); System.Console.Out.WriteLine(d.Get("sentid")); } }
private void AddPatterns(string id, IDictionary <int, ICollection <E> > p, bool commit) { try { SetIndexWriter(); Org.Apache.Lucene.Document.Document doc = new Org.Apache.Lucene.Document.Document(); doc.Add(new StringField("sentid", id, Field.Store.Yes)); doc.Add(new Field("patterns", GetBytes(p), LuceneFieldType.NotIndexed)); indexWriter.AddDocument(doc); if (commit) { indexWriter.Commit(); } } catch (IOException e) { //closeIndexWriter(); throw new Exception(e); } }