Ejemplo n.º 1
0
        public Processor(Configuration config, IDatabase database)
        {
            _config = config;
            _database = database;
            _listeners = new List<IMatchListener>();

            _passthrough = new PassThroughFilter();
            _choosebest = new ChooseBestFilter();

            // precomputing for later optimizations
            _proporder = new List<Property>();
            foreach (Property p in _config.GetProperties())
            {
                if (!p.IsIdProperty)
                    _proporder.Add(p);
            }

            _proporder.Sort(new PropertyComparator());

            // still precomputing
            double prob = 0.5;
            _accprob = new double[_proporder.Count];
            for (int ix = _proporder.Count - 1; ix >= 0; ix--)
            {
                prob = StandardUtils.ComputeBayes(prob, _proporder[ix].HighProbability);
                _accprob[ix] = prob;
            }
        }
Ejemplo n.º 2
0
        private int _sizeix; // position in prevsizes

        #endregion Fields

        #region Constructors

        public QueryResultTracker(Configuration config, Analyzer analyzer, IndexSearcher searcher, int maxSearchHits, float minRelevance)
        {
            _limit = 100;
            _prevsizes = new int[10];
            _config = config;
            _analyzer = analyzer;
            _maxSearchHits = maxSearchHits;
            _searcher = searcher;
            _minRelevance = minRelevance;
        }
Ejemplo n.º 3
0
        public LuceneDatabase(Configuration config, bool overwrite, DatabaseProperties dbprops)
        {
            _config = config;
            _analyzer = new StandardAnalyzer(Version.LUCENE_29);
            _maintracker = new QueryResultTracker(config, _analyzer, _searcher, dbprops.MaxSearchHits,
                                                  dbprops.MinRelevance);
            _maxSearchHits = dbprops.MaxSearchHits;
            _minRelevance = dbprops.MinRelevance;

            try
            {
                OpenIndexes(overwrite);
                OpenSearchers();
            }
            catch (Exception ex)
            {
                logger.Error("Error initializing object: {0}", ex.Message);
            }
        }
Ejemplo n.º 4
0
 public Processor(Configuration config, bool overwrite)
     : this(config, config.CreateDatabase(overwrite))
 {
 }
Ejemplo n.º 5
0
 public Processor(Configuration config)
     : this(config, true)
 {
 }
Ejemplo n.º 6
0
        //Note that if file starts with 'classpath:' the resource is looked
        // up on the classpath instead.
        public static Configuration Load(string file)
        {
            var cfg = new Configuration();
            var properties = new List<Property>();

            // Get the appropriate nodes using Linq to XML
            XElement xml = XElement.Load(file);

            // Get the threshold
            double threshold =
                xml.Elements("schema").Descendants("threshold").Select(x => double.Parse(x.Value)).FirstOrDefault();
            cfg.Threshold = threshold;

            // Get all of the properties
            IEnumerable<XElement> xmlProperties = from s in xml.Elements("schema")
                                                  from p in s.Descendants("property")
                                                  select p;

            foreach (XElement xElement in xmlProperties)
            {
                string propName = xElement.Descendants("name").First().Value;
                var property = new Property(propName);

                // Check to see if this is an id property
                XAttribute xAttribute = xElement.Attribute("type");
                if (xAttribute != null)
                {
                    string id = xAttribute.Value;
                    if (id != null && id == "id")
                    {
                        property.IsIdProperty = true;
                    }
                }
                else
                {
                    string comparatorName = xElement.Descendants("comparator").FirstOrDefault().Value;
                    property.Comparator = GetComparatorFromString(comparatorName);
                    property.LowProbability =
                        xElement.Descendants("low").Select(x => double.Parse(x.Value)).FirstOrDefault();
                    property.HighProbability =
                        xElement.Descendants("high").Select(x => double.Parse(x.Value)).FirstOrDefault();
                    properties.Add(property);
                }
            }

            cfg.SetProperties(properties);

            //// Get the datasources
            //XPathNodeIterator dsi = xpn.Select("/duke/*[not(self::schema)]");

            //while (dsi.MoveNext())
            //{
            //    if (dsi.Current != null && xpi.Current.Name == "csv")
            //    {
            //        var datasource = GetCsvDataSourceFromXml(dsi, xpn);
            //    }
            //}
            IEnumerable<XElement> dataSources = from d in xml.Elements()
                                                where d.Name != "schema"
                                                select d;

            foreach (XElement dataSource in dataSources)
            {
                if (dataSource.Name == "csv")
                {
                    var csvDs = new CsvDataSource();
                    Hashtable csvParams = GetParametersTable(dataSource);
                    csvDs.File = csvParams["input-file"].ToString();
                    if (csvParams.Contains("header-line"))
                        csvDs.HasHeader = (csvParams["header-line"].ToString().ToLower() == "true");

                    if (csvParams.Contains("skip-lines"))
                    {
                        int skipLines = 0;
                        csvDs.SkipLines = Int32.TryParse(csvParams["skip-lines"].ToString(), out skipLines)
                                              ? skipLines
                                              : 0;
                    }

                    csvDs.FileEncoding = csvParams.Contains("encoding")
                                             ? GetTextEncodingFromString(csvParams["encoding"].ToString())
                                             : Encoding.Default;

                    List<Column> cols = GetDataSourceColumns(dataSource);
                    foreach (Column column in cols)
                    {
                        csvDs.AddColumn(column);
                    }

                    cfg.AddDataSource(0, csvDs);
                }
            }

            return cfg;
        }