public Processor(Configuration config, IDatabase database) { _config = config; _database = database; _listeners = new List<IMatchListener>(); _passthrough = new PassThroughFilter(); _choosebest = new ChooseBestFilter(); // precomputing for later optimizations _proporder = new List<Property>(); foreach (Property p in _config.GetProperties()) { if (!p.IsIdProperty) _proporder.Add(p); } _proporder.Sort(new PropertyComparator()); // still precomputing double prob = 0.5; _accprob = new double[_proporder.Count]; for (int ix = _proporder.Count - 1; ix >= 0; ix--) { prob = StandardUtils.ComputeBayes(prob, _proporder[ix].HighProbability); _accprob[ix] = prob; } }
private int _sizeix; // position in prevsizes #endregion Fields #region Constructors public QueryResultTracker(Configuration config, Analyzer analyzer, IndexSearcher searcher, int maxSearchHits, float minRelevance) { _limit = 100; _prevsizes = new int[10]; _config = config; _analyzer = analyzer; _maxSearchHits = maxSearchHits; _searcher = searcher; _minRelevance = minRelevance; }
public LuceneDatabase(Configuration config, bool overwrite, DatabaseProperties dbprops) { _config = config; _analyzer = new StandardAnalyzer(Version.LUCENE_29); _maintracker = new QueryResultTracker(config, _analyzer, _searcher, dbprops.MaxSearchHits, dbprops.MinRelevance); _maxSearchHits = dbprops.MaxSearchHits; _minRelevance = dbprops.MinRelevance; try { OpenIndexes(overwrite); OpenSearchers(); } catch (Exception ex) { logger.Error("Error initializing object: {0}", ex.Message); } }
public Processor(Configuration config, bool overwrite) : this(config, config.CreateDatabase(overwrite)) { }
public Processor(Configuration config) : this(config, true) { }
//Note that if file starts with 'classpath:' the resource is looked // up on the classpath instead. public static Configuration Load(string file) { var cfg = new Configuration(); var properties = new List<Property>(); // Get the appropriate nodes using Linq to XML XElement xml = XElement.Load(file); // Get the threshold double threshold = xml.Elements("schema").Descendants("threshold").Select(x => double.Parse(x.Value)).FirstOrDefault(); cfg.Threshold = threshold; // Get all of the properties IEnumerable<XElement> xmlProperties = from s in xml.Elements("schema") from p in s.Descendants("property") select p; foreach (XElement xElement in xmlProperties) { string propName = xElement.Descendants("name").First().Value; var property = new Property(propName); // Check to see if this is an id property XAttribute xAttribute = xElement.Attribute("type"); if (xAttribute != null) { string id = xAttribute.Value; if (id != null && id == "id") { property.IsIdProperty = true; } } else { string comparatorName = xElement.Descendants("comparator").FirstOrDefault().Value; property.Comparator = GetComparatorFromString(comparatorName); property.LowProbability = xElement.Descendants("low").Select(x => double.Parse(x.Value)).FirstOrDefault(); property.HighProbability = xElement.Descendants("high").Select(x => double.Parse(x.Value)).FirstOrDefault(); properties.Add(property); } } cfg.SetProperties(properties); //// Get the datasources //XPathNodeIterator dsi = xpn.Select("/duke/*[not(self::schema)]"); //while (dsi.MoveNext()) //{ // if (dsi.Current != null && xpi.Current.Name == "csv") // { // var datasource = GetCsvDataSourceFromXml(dsi, xpn); // } //} IEnumerable<XElement> dataSources = from d in xml.Elements() where d.Name != "schema" select d; foreach (XElement dataSource in dataSources) { if (dataSource.Name == "csv") { var csvDs = new CsvDataSource(); Hashtable csvParams = GetParametersTable(dataSource); csvDs.File = csvParams["input-file"].ToString(); if (csvParams.Contains("header-line")) csvDs.HasHeader = (csvParams["header-line"].ToString().ToLower() == "true"); if (csvParams.Contains("skip-lines")) { int skipLines = 0; csvDs.SkipLines = Int32.TryParse(csvParams["skip-lines"].ToString(), out skipLines) ? skipLines : 0; } csvDs.FileEncoding = csvParams.Contains("encoding") ? GetTextEncodingFromString(csvParams["encoding"].ToString()) : Encoding.Default; List<Column> cols = GetDataSourceColumns(dataSource); foreach (Column column in cols) { csvDs.AddColumn(column); } cfg.AddDataSource(0, csvDs); } } return cfg; }