public virtual void Consume(FeatureVectorList featureVectors)
        {
            if (featureVectors != null)
            {
                long timeSliceTicks = _model is TimeSliceDCM ? (_model as TimeSliceDCM).TimeSliceTicks : -1;

                using (StreamWriter instanceLocationsFile = new StreamWriter(TrainingInstanceLocationsPath, true))
                {
                    foreach (FeatureVector featureVector in featureVectors)
                    {
                        Point point = featureVector.DerivedFrom as Point;
                        long  slice = timeSliceTicks > 0 ? point.Time.Ticks / timeSliceTicks : 1;
                        int   row   = (int)((point.Location.Y - _model.TrainingArea.BoundingBox.MinY) / _model.TrainingPointSpacing);
                        int   col   = (int)((point.Location.X - _model.TrainingArea.BoundingBox.MinX) / _model.TrainingPointSpacing);
                        instanceLocationsFile.WriteLine(slice + " " + row + " " + col);

                        if (_numFeaturesInEachVector == -1)
                        {
                            _numFeaturesInEachVector = featureVector.Count;
                        }
                        else if (_numFeaturesInEachVector != featureVector.Count)
                        {
                            throw new Exception("Feature vectors do not contain the same number of features. This probably indicates missing features during the feature extraction process.");
                        }
                    }

                    instanceLocationsFile.Close();
                }
            }
        }
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                if (Model.IncidentTypes.Count != 1)
                {
                    throw new Exception("SvmRank cannot be used for multi-incident predictions. Select a single incident type.");
                }

                Dictionary <int, Point> idPoint = new Dictionary <int, Point>(featureVectors.Count);
                foreach (Point point in featureVectors.Select(vector => vector.DerivedFrom as Point))
                {
                    idPoint.Add(point.Id, point);
                }

                foreach (FeatureVector vector in featureVectors)
                {
                    Point point = vector.DerivedFrom as Point;
                    if (point == null)
                    {
                        throw new NullReferenceException("Expected Point object in DerivedFrom");
                    }

                    PostGIS.Point vectorLocation = point.Location;
                    int           count          = idPoint.Values.Count(p => p.Location.DistanceTo(vectorLocation) <= Model.TrainingPointSpacing / 2d && p.IncidentType != PointPrediction.NullLabel);
                    vector.DerivedFrom.TrueClass = count + " qid:1";
                }

                _svmRank.ConsumeTrainingVectors(featureVectors);
            }
        }
Example #3
0
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter trainingFile = new StreamWriter(RawTrainPath, true))
                {
                    foreach (FeatureVector vector in featureVectors)
                    {
                        trainingFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                            {
                                trainingFile.Write("," + value);
                            }
                            else
                            {
                                trainingFile.Write(",0");
                            }
                        }
                        trainingFile.WriteLine();
                    }

                    trainingFile.Close();
                }
            }
        }
        public void Train(FeatureVectorList featureVectors)
        {
            Consume(featureVectors);
            BuildModel();

            LAIR.IO.File.Compress(TrainingInstanceLocationsPath, CompressedTrainingInstanceLocationsPath, true);
            System.IO.File.Delete(TrainingInstanceLocationsPath);
        }
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null)
            {
                _libLinear.ConsumeTrainingVectors(featureVectors);
            }
        }
 public virtual void Classify(FeatureVectorList featureVectors)
 {
     if (featureVectors != null)
     {
         foreach (FeatureVector featureVector in featureVectors)
         {
             if (featureVector.Count != _numFeaturesInEachVector)
             {
                 throw new Exception("Expected " + _numFeaturesInEachVector + " features in each vector, but saw " + featureVector.Count + ".");
             }
         }
     }
 }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (Model.IncidentTypes.Count != 1)
                throw new Exception("SvmRank cannot be used for multi-incident predictions. Select a single incident type.");

            string incident = Model.IncidentTypes.First();

            _svmRank.Classify(featureVectors);

            int maxRank = featureVectors.Max(vector => int.Parse(vector.DerivedFrom.PredictionConfidenceScores.Keys.First())) + 1;

            foreach (FeatureVector vector in featureVectors)
            {
                int rank = int.Parse(vector.DerivedFrom.PredictionConfidenceScores.Keys.First()) + 1;
                float score = (maxRank - rank) / (float)maxRank;
                vector.DerivedFrom.PredictionConfidenceScores.Clear();
                vector.DerivedFrom.PredictionConfidenceScores.Add(incident, score);
            }
        }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (Model.IncidentTypes.Count != 1)
            {
                throw new Exception("SvmRank cannot be used for multi-incident predictions. Select a single incident type.");
            }

            string incident = Model.IncidentTypes.First();

            _svmRank.Classify(featureVectors);

            int maxRank = featureVectors.Max(vector => int.Parse(vector.DerivedFrom.PredictionConfidenceScores.Keys.First())) + 1;

            foreach (FeatureVector vector in featureVectors)
            {
                int   rank  = int.Parse(vector.DerivedFrom.PredictionConfidenceScores.Keys.First()) + 1;
                float score = (maxRank - rank) / (float)maxRank;
                vector.DerivedFrom.PredictionConfidenceScores.Clear();
                vector.DerivedFrom.PredictionConfidenceScores.Add(incident, score);
            }
        }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath))
                {
                    predictionsFile.Write("Class");
                    foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        predictionsFile.Write("," + f.Id);
                    predictionsFile.WriteLine();

                    foreach (FeatureVector vector in featureVectors)
                    {
                        predictionsFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                                predictionsFile.Write("," + value);
                            else
                                predictionsFile.Write(",0");
                        }
                        predictionsFile.WriteLine();
                    }
                    predictionsFile.Close();
                }

                StringBuilder rCmd = new StringBuilder(@"
            predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @"
            mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @"
            predNorm=predRaw
            for(i in 2:NCOL(predRaw)) {
              cmax=mxmn[1,i-1]
              cmin=mxmn[2,i-1]
              predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2)
            }
            predNorm[is.na(predNorm)]=0
            library(randomForest)
            load(file=""" + RandomForestModelPath.Replace("\\", "/") + @""")" + @"
            rf.pred=predict(rf, predNorm, norm.votes=TRUE, type='prob')
            dfp<-data.frame(rf.pred)
            names(dfp)[names(dfp)=='NULL.'] <- 'NULL'
            write.table(dfp, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @"
            ");
                string output, error;
                R.Execute(rCmd.ToString(), false, out output, out error);

                try
                {
                    using (StreamReader predictionsFile = new StreamReader(PredictionsPath))
                    {
                        string[] colnames = predictionsFile.ReadLine().Split(',');
                        int row = 0;
                        string line;

                        while ((line = predictionsFile.ReadLine()) != null)
                        {
                            string[] lines = line.Split(',');

                            for (int i = 0; i < colnames.Length; i++)
                            {
                                string label = colnames[i].Replace("\"", @"");
                                label = label.Replace(".", " ");
                                float prob = float.Parse(lines[i]);
                                featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob);
                            }
                            row++;
                        }

                        predictionsFile.Close();

                        if (row != featureVectors.Count)
                            throw new Exception("Number of predictions doesn't match number of input vectors");
                    }
                }
                catch (Exception ex)
                {
                    throw new Exception("ERROR:  RandomForest failed to classify points. Output and error messages follow:" + Environment.NewLine +
                                        "\tException message:  " + ex.Message + Environment.NewLine +
                                        "\tR output:  " + output + Environment.NewLine +
                                        "\tR orror:  " + error);
                }
                finally
                {
                    try { File.Delete(ColumnMaxMinPath); }
                    catch { }
                    try { File.Delete(RandomForestModelPath); }
                    catch { }
                    try { File.Delete(RawPredictionInstancesPath); }
                    catch { }
                    try { File.Delete(PredictionsPath); }
                    catch { }
                }
            }
        }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            _libLinear.Classify(featureVectors);
        }
Example #11
0
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath))
                {
                    predictionsFile.Write("Class");
                    foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                    {
                        predictionsFile.Write("," + f.Id);
                    }
                    predictionsFile.WriteLine();

                    foreach (FeatureVector vector in featureVectors)
                    {
                        predictionsFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                            {
                                predictionsFile.Write("," + value);
                            }
                            else
                            {
                                predictionsFile.Write(",0");
                            }
                        }
                        predictionsFile.WriteLine();
                    }
                    predictionsFile.Close();
                }

                StringBuilder rCmd = new StringBuilder(@"
predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @"
mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @"
predNorm=predRaw
for(i in 2:NCOL(predRaw)) {
  cmax=mxmn[1,i-1]
  cmin=mxmn[2,i-1]
  predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2)
}
predNorm[is.na(predNorm)]=0
library(randomForest)
load(file=""" + RandomForestModelPath.Replace("\\", "/") + @""")" + @"
rf.pred=predict(rf, predNorm, norm.votes=TRUE, type='prob')
dfp<-data.frame(rf.pred)
names(dfp)[names(dfp)=='NULL.'] <- 'NULL'
write.table(dfp, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @"
");
                string        output, error;
                R.Execute(rCmd.ToString(), false, out output, out error);

                try
                {
                    using (StreamReader predictionsFile = new StreamReader(PredictionsPath))
                    {
                        string[] colnames = predictionsFile.ReadLine().Split(',');
                        int      row      = 0;
                        string   line;

                        while ((line = predictionsFile.ReadLine()) != null)
                        {
                            string[] lines = line.Split(',');

                            for (int i = 0; i < colnames.Length; i++)
                            {
                                string label = colnames[i].Replace("\"", @"");
                                label = label.Replace(".", " ");
                                float prob = float.Parse(lines[i]);
                                featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob);
                            }
                            row++;
                        }

                        predictionsFile.Close();

                        if (row != featureVectors.Count)
                        {
                            throw new Exception("Number of predictions doesn't match number of input vectors");
                        }
                    }
                }
                catch (Exception ex)
                {
                    throw new Exception("ERROR:  RandomForest failed to classify points. Output and error messages follow:" + Environment.NewLine +
                                        "\tException message:  " + ex.Message + Environment.NewLine +
                                        "\tR output:  " + output + Environment.NewLine +
                                        "\tR orror:  " + error);
                }
                finally
                {
                    try { File.Delete(ColumnMaxMinPath); }
                    catch { }
                    try { File.Delete(RandomForestModelPath); }
                    catch { }
                    try { File.Delete(RawPredictionInstancesPath); }
                    catch { }
                    try { File.Delete(PredictionsPath); }
                    catch { }
                }
            }
        }
        public void Train(FeatureVectorList featureVectors)
        {
            Consume(featureVectors);
            BuildModel();

            LAIR.IO.File.Compress(TrainingInstanceLocationsPath, CompressedTrainingInstanceLocationsPath, true);
            System.IO.File.Delete(TrainingInstanceLocationsPath);
        }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath))
                {
                    predictionsFile.Write("Class");
                    foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                    {
                        predictionsFile.Write("," + f.Id);
                    }
                    predictionsFile.WriteLine();

                    foreach (FeatureVector vector in featureVectors)
                    {
                        predictionsFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                            {
                                predictionsFile.Write("," + value);
                            }
                            else
                            {
                                predictionsFile.Write(",0");
                            }
                        }
                        predictionsFile.WriteLine();
                    }
                    predictionsFile.Close();
                }

                StringBuilder rCmd = new StringBuilder(@"
predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @"
mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @"
predNorm=predRaw
for(i in 2:NCOL(predRaw)) {
  cmax=mxmn[1,i-1]
  cmin=mxmn[2,i-1]
  predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2)
}
predNorm[is.na(predNorm)]=0
library(ada)
set.seed(99)
load(file=""" + ClassPath.Replace("\\", "/") + @""")" + @"
if(length(cls)==2) {
  load(file=""" + AdaModelPath.Replace("\\", "/") + @""")" + @"
  adb.pred<-predict(adb, newdata=predNorm, type='prob')
  mult<-data.frame(adb.pred)
  names(mult)<-sort(c(toString(cls[1]), toString(cls[2])))
} else {
  mult<-data.frame(matrix(0, ncol=1, nrow=NROW(predRaw)))
  names(mult)<-c('INIT_DF')
  for(i in 1:length(cls)) {
    load(file=paste('" + Path.Combine(Model.ModelDirectory, @"ada', i, '.RData', sep='')").Replace("\\", "/") + @")" + @"
    adb.pred<-predict(adb, newdata=predNorm, type='prob')
    abp<-data.frame(adb.pred)
    names(abp)<-sort(c(toString(cls[i]), 'REST'))
    abp<-subset(abp, select=-c(REST))
    mult<-cbind(mult, abp)
  }
  mult<-subset(mult, select=-c(INIT_DF))
  mult<-1/(1+exp(-1*mult))
  sums<-data.frame(rowSums(mult))
  for(j in 1:length(cls)) {
    mult[j]<-mult[j]/sums
  }
}
write.table(mult, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @"
");
                string        output, error;
                R.Execute(rCmd.ToString(), false, out output, out error);

                try
                {
                    using (StreamReader predictionsFile = new StreamReader(PredictionsPath))
                    {
                        string[] colnames = predictionsFile.ReadLine().Split(',');
                        int      row      = 0;
                        string   line;

                        while ((line = predictionsFile.ReadLine()) != null)
                        {
                            string[] lines = line.Split(',');

                            for (int i = 0; i < colnames.Length; i++)
                            {
                                string label = colnames[i].Replace("\"", "");
                                float  prob  = float.Parse(lines[i]);
                                featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob);
                            }
                            row++;
                        }

                        predictionsFile.Close();

                        if (row != featureVectors.Count)
                        {
                            throw new Exception("Number of predictions doesn't match number of input vectors");
                        }
                    }
                }
                catch (Exception ex)
                {
                    throw new Exception("ERROR:  AdaBoost failed to classify points. Output and error messages follow:" + Environment.NewLine +
                                        "\tException message:  " + ex.Message + Environment.NewLine +
                                        "\tR output:  " + output + Environment.NewLine +
                                        "\tR orror:  " + error);
                }
                finally
                {
                    try { File.Delete(ColumnMaxMinPath); }
                    catch { }
                    try { File.Delete(ClassPath); }
                    catch { }
                    try { File.Delete(AdaModelPath); }
                    catch { }
                    try { File.Delete(RawPredictionInstancesPath); }
                    catch { }
                    try { File.Delete(PredictionsPath); }
                    catch { }
                }
            }
        }
        /// <summary>
        /// Extracts feature vectors from points in a time range.
        /// </summary>
        /// <param name="prediction">Prediction to extract vectors for.</param>
        /// <param name="training">Whether or not this is the training phase.</param>
        /// <param name="start">Start time (points without a time are always included).</param>
        /// <param name="end">End time (points without a time are always included).</param>
        /// <returns></returns>
        protected virtual IEnumerable<FeatureVectorList> ExtractFeatureVectors(Prediction prediction, bool training, DateTime start, DateTime end)
        {
            // this can be called concurrently (e.g., via the time slice model with one thread per slice), so lock on prediction to get the point objects and their vectors
            FeatureVectorList featureVectors;
            Dictionary<int, FeatureVector> pointIdFeatureVector;
            int numFeatures;
            lock (prediction)
            {
                prediction.ReleasePoints(); // so that we get new point objects each time -- their times might be modified by a sub-class (e.g., TimeSliceDCM).
                featureVectors = new FeatureVectorList(prediction.Points.Count);
                pointIdFeatureVector = new Dictionary<int, FeatureVector>(prediction.Points.Count);
                numFeatures = GetNumFeaturesExtractedFor(prediction);
                foreach (Point point in prediction.Points)
                    if (point.Time == DateTime.MinValue || (point.Time >= start && point.Time <= end))
                    {
                        point.TrueClass = point.IncidentType;
                        FeatureVector vector = new FeatureVector(point, numFeatures);
                        featureVectors.Add(vector);
                        pointIdFeatureVector.Add(point.Id, vector);
                    }
            }

            Area area = training ? prediction.Model.TrainingArea : prediction.PredictionArea;
            Set<Thread> threads = new Set<Thread>();

            #region spatial distance features
            List<Feature> spatialDistanceFeatures = Features.Where(f => f.EnumValue.Equals(FeatureType.MinimumDistanceToGeometry)).ToList();
            if (spatialDistanceFeatures.Count > 0)
            {
                Console.Out.WriteLine("Extracting spatial distance feature values");
                float distanceWhenBeyondThreshold = (float)Math.Sqrt(2.0 * Math.Pow(FeatureDistanceThreshold, 2)); // with a bounding box of FeatureDistanceThreshold around each point, the maximum distance between a point and some feature shapefile geometry would be sqrt(2*FeatureDistanceThreshold^2). That is, the feature shapefile geometry would be positioned in one of the corners of the bounding box.
                threads.Clear();
                for (int i = 0; i < Configuration.ProcessorCount; ++i)
                {
                    Thread t = new Thread(new ParameterizedThreadStart(o =>
                        {
                            int core = (int)o;
                            NpgsqlConnection threadConnection = DB.Connection.OpenConnection;
                            string pointTableName = Point.GetTableName(prediction);
                            foreach (Feature spatialDistanceFeature in spatialDistanceFeatures)
                            {
                                Shapefile shapefile = new Shapefile(int.Parse(training ? spatialDistanceFeature.TrainingResourceId : spatialDistanceFeature.PredictionResourceId));

                                NpgsqlCommand cmd = DB.Connection.NewCommand("SELECT points." + Point.Columns.Id + " as points_" + Point.Columns.Id + "," +
                                                                             "CASE WHEN COUNT(" + shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Geometry + ")=0 THEN " + distanceWhenBeyondThreshold + " " +
                                                                             "ELSE min(st_distance(st_closestpoint(" + shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Geometry + ",points." + Point.Columns.Location + "),points." + Point.Columns.Location + ")) " +
                                                                             "END as feature_value " +

                                                                             "FROM (SELECT *,st_expand(" + pointTableName + "." + Point.Columns.Location + "," + FeatureDistanceThreshold + ") as bounding_box " +
                                                                                   "FROM " + pointTableName + " " +
                                                                                   "WHERE " + pointTableName + "." + Point.Columns.Id + " % " + Configuration.ProcessorCount + " = " + core + " AND " +
                                                                                              "(" +
                                                                                                  pointTableName + "." + Point.Columns.Time + "='-infinity'::timestamp OR " +
                                                                                                  "(" +
                                                                                                      pointTableName + "." + Point.Columns.Time + ">=@point_start AND " +
                                                                                                      pointTableName + "." + Point.Columns.Time + "<=@point_end" +
                                                                                                  ")" +
                                                                                              ")" +
                                                                                   ") points " +

                                                                             "LEFT JOIN " + shapefile.GeometryTable + " " +

                                                                             "ON points.bounding_box && " + shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Geometry + " AND " +
                                                                                 "(" +
                                                                                    shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Time + "='-infinity'::timestamp OR " +
                                                                                    "(" +
                                                                                        shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Time + ">=@geometry_start AND " +
                                                                                        shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Time + "<=@geometry_end" +
                                                                                    ")" +
                                                                                 ")" +

                                                                             "GROUP BY points." + Point.Columns.Id, null, threadConnection);

                                DateTime spatialDistanceFeatureStart = start - spatialDistanceFeature.Parameters.GetTimeSpanValue(SpatialDistanceParameter.LagOffset);
                                DateTime spatialDistanceFeatureEnd = spatialDistanceFeatureStart + spatialDistanceFeature.Parameters.GetTimeSpanValue(SpatialDistanceParameter.LagDuration);

                                if (spatialDistanceFeatureEnd >= start)
                                    Console.Out.WriteLine("WARNING:  Spatial distance sample overlaps extraction period.");

                                if (spatialDistanceFeatureEnd < spatialDistanceFeatureStart)
                                    Console.Out.WriteLine("WARNING:  Spatial distance sample end precedes sample start.");

                                ConnectionPool.AddParameters(cmd, new Parameter("point_start", NpgsqlDbType.Timestamp, start),
                                                                  new Parameter("point_end", NpgsqlDbType.Timestamp, end),
                                                                  new Parameter("geometry_start", NpgsqlDbType.Timestamp, spatialDistanceFeatureStart),
                                                                  new Parameter("geometry_end", NpgsqlDbType.Timestamp, spatialDistanceFeatureEnd));

                                NpgsqlDataReader reader = cmd.ExecuteReader();
                                NumericFeature distanceFeature = _idNumericFeature[spatialDistanceFeature.Id];
                                while (reader.Read())
                                {
                                    FeatureVector vector;
                                    if (!pointIdFeatureVector.TryGetValue(Convert.ToInt32(reader["points_" + Point.Columns.Id]), out vector))  // above, we select all points that fall between point_start and point_end. the latter can be one tick short of the next minute, and npgsql rounds up causing points to appear in the reader that we didn't add to the pointIdFeatureVector collection.
                                        continue;

                                    double value = Convert.ToDouble(reader["feature_value"]);

                                    // value > threshold shouldn't happen here, since we exluced such objects from consideration above; however, the calculations aren't perfect in postgis, so we check again and reset appropriately
                                    if (value > distanceWhenBeyondThreshold)
                                        value = distanceWhenBeyondThreshold;

                                    vector.Add(distanceFeature, value, false); // don't update range due to concurrent access to the feature
                                }
                                reader.Close();
                            }

                            DB.Connection.Return(threadConnection);
                        }));

                    t.Start(i);
                    threads.Add(t);
                }

                foreach (Thread t in threads)
                    t.Join();
            }
            #endregion

            #region spatial density features
            List<Feature> spatialDensityFeatures = Features.Where(f => f.EnumValue.Equals(FeatureType.GeometryDensity)).ToList();
            if (spatialDensityFeatures.Count > 0)
            {
                List<PostGIS.Point> densityEvalPoints = featureVectors.Select(v => (v.DerivedFrom as Point).Location).ToList();
                Dictionary<string, List<float>> featureIdDensityEstimates = new Dictionary<string, List<float>>(spatialDensityFeatures.Count);
                threads.Clear();
                for (int i = 0; i < Configuration.ProcessorCount; ++i)
                {
                    Thread t = new Thread(new ParameterizedThreadStart(core =>
                        {
                            NpgsqlCommand command = DB.Connection.NewCommand(null);
                            for (int j = (int)core; j < spatialDensityFeatures.Count; j += Configuration.ProcessorCount)
                            {
                                Feature spatialDensityFeature = spatialDensityFeatures[j];

                                DateTime spatialDensityFeatureStart = start - spatialDensityFeature.Parameters.GetTimeSpanValue(SpatialDensityParameter.LagOffset);
                                DateTime spatialDensityFeatureEnd = spatialDensityFeatureStart + spatialDensityFeature.Parameters.GetTimeSpanValue(SpatialDensityParameter.LagDuration);

                                if (spatialDensityFeatureEnd >= start)
                                    Console.Out.WriteLine("WARNING:  Spatial density sample overlaps extraction period.");

                                if (spatialDensityFeatureEnd < spatialDensityFeatureStart)
                                    Console.Out.WriteLine("WARNING:  Spatial density sample end precedes sample start.");

                                Shapefile shapefile = new Shapefile(int.Parse(training ? spatialDensityFeature.TrainingResourceId : spatialDensityFeature.PredictionResourceId));
                                string geometryRecordWhereClause = "WHERE " + ShapefileGeometry.Columns.Time + "='-infinity'::timestamp OR (" + ShapefileGeometry.Columns.Time + ">=@geometry_start AND " + ShapefileGeometry.Columns.Time + "<=@geometry_end)";
                                Parameter geometryStart = new Parameter("geometry_start", NpgsqlDbType.Timestamp, spatialDensityFeatureStart);
                                Parameter geometryEnd = new Parameter("geometry_end", NpgsqlDbType.Timestamp, spatialDensityFeatureEnd);
                                List<PostGIS.Point> kdeInputPoints = Geometry.GetPoints(command, shapefile.GeometryTable, ShapefileGeometry.Columns.Geometry, ShapefileGeometry.Columns.Id, geometryRecordWhereClause, -1, geometryStart.NpgsqlParameter, geometryEnd.NpgsqlParameter).SelectMany(pointList => pointList).Select(p => new PostGIS.Point(p.X, p.Y, area.Shapefile.SRID)).ToList();

                                Console.Out.WriteLine("Computing spatial density of \"" + shapefile.Name + "\".");
                                int sampleSize = spatialDensityFeature.Parameters.GetIntegerValue(SpatialDensityParameter.SampleSize);
                                List<float> densityEstimates = KernelDensityDCM.GetDensityEstimate(kdeInputPoints, sampleSize, false, -1, -1, densityEvalPoints, false);

                                // the density might not be computable if too few points are provided -- use default value for all evaluation points in such cases
                                if (densityEstimates.Count != densityEvalPoints.Count)
                                {
                                    float defaultValue = spatialDensityFeature.Parameters.GetFloatValue(SpatialDensityParameter.DefaultValue);
                                    Console.Out.WriteLine("WARNING:  Using default value \"" + defaultValue + "\" for feature " + spatialDensityFeature);
                                    densityEstimates = Enumerable.Repeat(defaultValue, densityEvalPoints.Count).ToList();
                                }

                                lock (featureIdDensityEstimates) { featureIdDensityEstimates.Add(spatialDensityFeature.Id, densityEstimates); }
                            }

                            DB.Connection.Return(command.Connection);
                        }));

                    t.Start(i);
                    threads.Add(t);
                }

                foreach (Thread t in threads)
                    t.Join();

                foreach (string featureId in featureIdDensityEstimates.Keys)
                {
                    List<float> densityEstimates = featureIdDensityEstimates[featureId];
                    NumericFeature densityFeature = _idNumericFeature[featureId];
                    for (int i = 0; i < densityEstimates.Count; ++i)
                        featureVectors[i].Add(densityFeature, densityEstimates[i], false);  // don't update range due to concurrent access to the feature
                }
            }
            #endregion

            #region geometry attribute features
            List<Feature> geometryAttributeFeatures = Features.Where(f => f.EnumValue.Equals(FeatureType.GeometryAttribute)).ToList();
            if (geometryAttributeFeatures.Count > 0)
            {
                Console.Out.WriteLine("Extracting geometry attribute features.");
                threads.Clear();
                for (int i = 0; i < Configuration.ProcessorCount; ++i)
                {
                    Thread t = new Thread(new ParameterizedThreadStart(o =>
                        {
                            int core = (int)o;
                            NpgsqlConnection threadConnection = DB.Connection.OpenConnection;
                            string pointTableName = Point.GetTableName(prediction);
                            foreach (Feature geometryAttributeFeature in geometryAttributeFeatures)
                            {
                                Shapefile shapefile = new Shapefile(int.Parse(training ? geometryAttributeFeature.TrainingResourceId : geometryAttributeFeature.PredictionResourceId));
                                string attributeColumn = geometryAttributeFeature.Parameters.GetStringValue(GeometryAttributeParameter.AttributeColumn);
                                NpgsqlCommand cmd = DB.Connection.NewCommand("SELECT " + pointTableName + "." + Point.Columns.Id + " as point_id," + shapefile.GeometryTable + "." + attributeColumn + " as geometry_attribute " +
                                                                             "FROM " + pointTableName + " " +
                                                                             "LEFT JOIN " + shapefile.GeometryTable + " " + // the geometry might not overlap the point, in which case we'll use the default feature value below
                                                                             "ON st_intersects(" + pointTableName + "." + Point.Columns.Location + "," + shapefile.GeometryTable + "." + ShapefileGeometry.Columns.Geometry + ") " +
                                                                             "WHERE " + pointTableName + "." + Point.Columns.Id + " % " + Configuration.ProcessorCount + " = " + core + " AND " +
                                                                                        "(" +
                                                                                          pointTableName + "." + Point.Columns.Time + "='-infinity'::timestamp OR " +
                                                                                          "(" +
                                                                                            pointTableName + "." + Point.Columns.Time + ">=@point_start AND " +
                                                                                            pointTableName + "." + Point.Columns.Time + "<=@point_end" +
                                                                                          ")" +
                                                                                        ") " +
                                                                             "ORDER BY " + pointTableName + "." + Point.Columns.Id, null, threadConnection);

                                ConnectionPool.AddParameters(cmd, new Parameter("point_start", NpgsqlDbType.Timestamp, start),
                                                                  new Parameter("point_end", NpgsqlDbType.Timestamp, end));

                                LAIR.MachineLearning.Feature attributeFeature;
                                string attributeType = geometryAttributeFeature.Parameters.GetStringValue(GeometryAttributeParameter.AttributeType);
                                if (attributeType == "Numeric")
                                    attributeFeature = _idNumericFeature[geometryAttributeFeature.Id] as LAIR.MachineLearning.Feature;
                                else if (attributeType == "Nominal")
                                    attributeFeature = _idNominalFeature[geometryAttributeFeature.Id] as LAIR.MachineLearning.Feature;
                                else
                                    throw new NotImplementedException("Unrecognized geometry attribute feature type:  " + attributeType);

                                List<object> values = new List<object>();
                                int currPointId = -1;
                                int pointId = -1;

                                Action addFeatureToVector = new Action(() =>
                                    {
                                        if (values.Count > 0)
                                        {
                                            FeatureVector vector = pointIdFeatureVector[currPointId];
                                            if (attributeFeature is NumericFeature)
                                                vector.Add(attributeFeature, values.Select(v => Convert.ToSingle(v)).Average(), false);  // don't update range due to concurrent access to the feature
                                            else if (values.Count == 1)
                                                vector.Add(attributeFeature, Convert.ToString(values[0]), false);  // don't update range due to concurrent access to the feature
                                            else
                                                throw new Exception("Nominal geometry attribute \"" + attributeColumn + "\" of shapefile \"" + shapefile.GeometryTable + "\" has multiple non-numeric values at point \"" + (vector.DerivedFrom as Point).Location + "\".");
                                        }

                                        values.Clear();
                                        currPointId = pointId;
                                    });

                                NpgsqlDataReader reader = cmd.ExecuteReader();
                                string defaultValue = geometryAttributeFeature.Parameters.GetStringValue(GeometryAttributeParameter.DefaultValue);
                                while (reader.Read())
                                {
                                    pointId = Convert.ToInt32(reader["point_id"]);
                                    if (pointId != currPointId)
                                        addFeatureToVector();

                                    object value = reader["geometry_attribute"];
                                    if (value is DBNull)  // we did a left join above, so the value might be null meaning the geometry did not overlap the point
                                        value = defaultValue;

                                    values.Add(value);
                                }
                                reader.Close();

                                addFeatureToVector();
                            }

                            DB.Connection.Return(threadConnection);
                        }));

                    t.Start(i);
                    threads.Add(t);
                }

                foreach (Thread t in threads)
                    t.Join();
            }
            #endregion

            #region incident density features
            List<Feature> kdeFeatures = Features.Where(f => f.EnumValue.Equals(FeatureType.IncidentDensity)).ToList();
            if (kdeFeatures.Count > 0)
            {
                List<PostGIS.Point> densityEvalPoints = featureVectors.Select(v => (v.DerivedFrom as Point).Location).ToList();
                Dictionary<string, List<float>> featureIdDensityEstimates = new Dictionary<string, List<float>>(kdeFeatures.Count);
                threads.Clear();
                for (int i = 0; i < Configuration.ProcessorCount; ++i)
                {
                    Thread t = new Thread(new ParameterizedThreadStart(core =>
                        {
                            for (int j = (int)core; j < kdeFeatures.Count; j += Configuration.ProcessorCount)
                            {
                                Feature kdeFeature = kdeFeatures[j];

                                List<PostGIS.Point> kdeInputPoints = new List<PostGIS.Point>();
                                string incident = training ? kdeFeature.TrainingResourceId : kdeFeature.PredictionResourceId;
                                int lagCount = kdeFeature.Parameters.GetIntegerValue(IncidentDensityParameter.LagCount);
                                TimeSpan lagOffset = kdeFeature.Parameters.GetTimeSpanValue(IncidentDensityParameter.LagOffset);
                                TimeSpan lagDuration = kdeFeature.Parameters.GetTimeSpanValue(IncidentDensityParameter.LagDuration);
                                for (int k = 1; k <= lagCount; ++k)
                                {
                                    DateTime incidentSampleStart = start - new TimeSpan(k * lagOffset.Ticks);
                                    DateTime incidentSampleEnd = incidentSampleStart + lagDuration;

                                    if (incidentSampleEnd >= start)
                                        Console.Out.WriteLine("WARNING:  Incident density sample overlaps extraction period.");

                                    if (incidentSampleEnd < incidentSampleStart)
                                        Console.Out.WriteLine("WARNING:  Incident density sample end precedes sample start.");

                                    kdeInputPoints.AddRange(Incident.Get(incidentSampleStart, incidentSampleEnd, area, incident).Select(inc => inc.Location));
                                }

                                Console.Out.WriteLine("Computing spatial density of \"" + incident + "\" with " + lagCount + " lag(s) at offset " + lagOffset + ", each with duration " + lagDuration);
                                int sampleSize = kdeFeature.Parameters.GetIntegerValue(IncidentDensityParameter.SampleSize);
                                List<float> densityEstimates = KernelDensityDCM.GetDensityEstimate(kdeInputPoints, sampleSize, false, 0, 0, densityEvalPoints, false);

                                // the density might not be computable if too few points are provided -- use default density for all evaluation points in such cases
                                if (densityEstimates.Count != densityEvalPoints.Count)
                                {
                                    float defaultValue = kdeFeature.Parameters.GetFloatValue(IncidentDensityParameter.DefaultValue);
                                    Console.Out.WriteLine("WARNING:  Using default value \"" + defaultValue + "\" for feature " + kdeFeature);
                                    densityEstimates = Enumerable.Repeat(defaultValue, densityEvalPoints.Count).ToList();
                                }

                                lock (featureIdDensityEstimates) { featureIdDensityEstimates.Add(kdeFeature.Id, densityEstimates); }
                            }
                        }));

                    t.Start(i);
                    threads.Add(t);
                }

                foreach (Thread t in threads)
                    t.Join();

                foreach (string featureId in featureIdDensityEstimates.Keys)
                {
                    List<float> densityEstimates = featureIdDensityEstimates[featureId];
                    NumericFeature densityFeature = _idNumericFeature[featureId];
                    for (int i = 0; i < densityEstimates.Count; ++i)
                        featureVectors[i].Add(densityFeature, densityEstimates[i], false);  // don't update range due to concurrent access to the feature (e.g., via time slice model calling into this method)
                }
            }
            #endregion

            // update all feature ranges. this wasn't done above due to potential concurrent access, either within this method or from calls into this method. each feature needs to be locked here due to potential concurrent calls into this method (e.g., time slice model)
            foreach (FeatureVector vector in featureVectors)
                foreach (LAIR.MachineLearning.Feature f in vector)
                    lock (f)
                        f.UpdateRange(vector[f]);

            IFeatureExtractor externalFeatureExtractor = InitializeExternalFeatureExtractor(typeof(FeatureBasedDCM));
            if (externalFeatureExtractor == null)
                yield return featureVectors;
            else
                foreach (FeatureVectorList externalFeatureVectors in externalFeatureExtractor.ExtractFeatures(prediction, featureVectors, training, start, end, true))
                    yield return externalFeatureVectors;
        }
        internal static List<Tuple<string, Parameter>> GetPointPredictionValues(FeatureVectorList featureVectors)
        {
            List<Tuple<string, Parameter>> pointPredictionValues = new List<Tuple<string, Parameter>>(featureVectors.Count);
            int pointNum = 0; // must use this instead of point IDs because point IDs get repeated for the timeslice model
            foreach (FeatureVector featureVector in featureVectors)
            {
                Point point = featureVector.DerivedFrom as Point;
                string timeParameterName = "@time_" + pointNum++;
                IEnumerable<KeyValuePair<string, double>> incidentScore = point.PredictionConfidenceScores.Where(kvp => kvp.Key != PointPrediction.NullLabel).Select(kvp => new KeyValuePair<string, double>(kvp.Key, kvp.Value));
                pointPredictionValues.Add(new Tuple<string, Parameter>(PointPrediction.GetValue(point.Id, timeParameterName, incidentScore, incidentScore.Sum(kvp => kvp.Value)), new Parameter(timeParameterName, NpgsqlDbType.Timestamp, point.Time)));
            }

            return pointPredictionValues;
        }
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter trainingFile = new StreamWriter(RawTrainPath, true))
                {
                    foreach (FeatureVector vector in featureVectors)
                    {
                        trainingFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                                trainingFile.Write("," + value);
                            else
                                trainingFile.Write(",0");
                        }
                        trainingFile.WriteLine();
                    }

                    trainingFile.Close();
                }
            }
        }
        public virtual void Consume(FeatureVectorList featureVectors)
        {
            if (featureVectors != null)
            {
                long timeSliceTicks = _model is TimeSliceDCM ? (_model as TimeSliceDCM).TimeSliceTicks : -1;

                using (StreamWriter instanceLocationsFile = new StreamWriter(TrainingInstanceLocationsPath, true))
                {
                    foreach (FeatureVector featureVector in featureVectors)
                    {
                        Point point = featureVector.DerivedFrom as Point;
                        long slice = timeSliceTicks > 0 ? point.Time.Ticks / timeSliceTicks : 1;
                        int row = (int)((point.Location.Y - _model.TrainingArea.BoundingBox.MinY) / _model.TrainingPointSpacing);
                        int col = (int)((point.Location.X - _model.TrainingArea.BoundingBox.MinX) / _model.TrainingPointSpacing);
                        instanceLocationsFile.WriteLine(slice + " " + row + " " + col);

                        if (_numFeaturesInEachVector == -1)
                            _numFeaturesInEachVector = featureVector.Count;
                        else if (_numFeaturesInEachVector != featureVector.Count)
                            throw new Exception("Feature vectors do not contain the same number of features. This probably indicates missing features during the feature extraction process.");
                    }

                    instanceLocationsFile.Close();
                }
            }
        }
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null)
                _libLinear.ConsumeTrainingVectors(featureVectors);
        }
 public virtual void Classify(FeatureVectorList featureVectors)
 {
     if (featureVectors != null)
         foreach (FeatureVector featureVector in featureVectors)
             if (featureVector.Count != _numFeaturesInEachVector)
                 throw new Exception("Expected " + _numFeaturesInEachVector + " features in each vector, but saw " + featureVector.Count + ".");
 }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            _libLinear.Classify(featureVectors);
        }
        public override void Consume(FeatureVectorList featureVectors)
        {
            base.Consume(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                if (Model.IncidentTypes.Count != 1)
                    throw new Exception("SvmRank cannot be used for multi-incident predictions. Select a single incident type.");

                Dictionary<int, Point> idPoint = new Dictionary<int, Point>(featureVectors.Count);
                foreach (Point point in featureVectors.Select(vector => vector.DerivedFrom as Point))
                    idPoint.Add(point.Id, point);

                foreach (FeatureVector vector in featureVectors)
                {
                    Point point = vector.DerivedFrom as Point;
                    if (point == null)
                        throw new NullReferenceException("Expected Point object in DerivedFrom");

                    PostGIS.Point vectorLocation = point.Location;
                    int count = idPoint.Values.Count(p => p.Location.DistanceTo(vectorLocation) <= Model.TrainingPointSpacing / 2d && p.IncidentType != PointPrediction.NullLabel);
                    vector.DerivedFrom.TrueClass = count + " qid:1";
                }

                _svmRank.ConsumeTrainingVectors(featureVectors);
            }
        }
        public override void Classify(FeatureVectorList featureVectors)
        {
            base.Classify(featureVectors);

            if (featureVectors != null && featureVectors.Count > 0)
            {
                using (StreamWriter predictionsFile = new StreamWriter(RawPredictionInstancesPath))
                {
                    predictionsFile.Write("Class");
                    foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        predictionsFile.Write("," + f.Id);
                    predictionsFile.WriteLine();

                    foreach (FeatureVector vector in featureVectors)
                    {
                        predictionsFile.Write(vector.DerivedFrom.TrueClass);
                        foreach (PTL.ATT.Models.Feature f in Model.Features.OrderBy(i => i.Id))
                        {
                            object value;
                            if (vector.TryGetValue(f.Id, out value))
                                predictionsFile.Write("," + value);
                            else
                                predictionsFile.Write(",0");
                        }
                        predictionsFile.WriteLine();
                    }
                    predictionsFile.Close();
                }

                StringBuilder rCmd = new StringBuilder(@"
            predRaw=read.csv(""" + RawPredictionInstancesPath.Replace("\\", "/") + @""", header = TRUE, sep = ',')" + @"
            mxmn=read.csv(""" + ColumnMaxMinPath.Replace("\\", "/") + @""", header = FALSE, sep = ',')" + @"
            predNorm=predRaw
            for(i in 2:NCOL(predRaw)) {
              cmax=mxmn[1,i-1]
              cmin=mxmn[2,i-1]
              predNorm[,i] = (predRaw[,i]-((cmax+cmin)/2))/((cmax-cmin)/2)
            }
            predNorm[is.na(predNorm)]=0
            library(ada)
            set.seed(99)
            load(file=""" + ClassPath.Replace("\\", "/") + @""")" + @"
            if(length(cls)==2) {
              load(file=""" + AdaModelPath.Replace("\\", "/") + @""")" + @"
              adb.pred<-predict(adb, newdata=predNorm, type='prob')
              mult<-data.frame(adb.pred)
              names(mult)<-sort(c(toString(cls[1]), toString(cls[2])))
            } else {
              mult<-data.frame(matrix(0, ncol=1, nrow=NROW(predRaw)))
              names(mult)<-c('INIT_DF')
              for(i in 1:length(cls)) {
            load(file=paste('" + Path.Combine(Model.ModelDirectory, @"ada', i, '.RData', sep='')").Replace("\\", "/") + @")" + @"
            adb.pred<-predict(adb, newdata=predNorm, type='prob')
            abp<-data.frame(adb.pred)
            names(abp)<-sort(c(toString(cls[i]), 'REST'))
            abp<-subset(abp, select=-c(REST))
            mult<-cbind(mult, abp)
              }
              mult<-subset(mult, select=-c(INIT_DF))
              mult<-1/(1+exp(-1*mult))
              sums<-data.frame(rowSums(mult))
              for(j in 1:length(cls)) {
            mult[j]<-mult[j]/sums
              }
            }
            write.table(mult, file=""" + PredictionsPath.Replace("\\", "/") + @""", row.names=FALSE, sep=',')" + @"
            ");
                string output, error;
                R.Execute(rCmd.ToString(), false, out output, out error);

                try
                {
                    using (StreamReader predictionsFile = new StreamReader(PredictionsPath))
                    {
                        string[] colnames = predictionsFile.ReadLine().Split(',');
                        int row = 0;
                        string line;

                        while ((line = predictionsFile.ReadLine()) != null)
                        {
                            string[] lines = line.Split(',');

                            for (int i = 0; i < colnames.Length; i++)
                            {
                                string label = colnames[i].Replace("\"", "");
                                float prob = float.Parse(lines[i]);
                                featureVectors[row].DerivedFrom.PredictionConfidenceScores.Add(label, prob);
                            }
                            row++;
                        }

                        predictionsFile.Close();

                        if (row != featureVectors.Count)
                            throw new Exception("Number of predictions doesn't match number of input vectors");
                    }
                }
                catch (Exception ex)
                {
                    throw new Exception("ERROR:  AdaBoost failed to classify points. Output and error messages follow:" + Environment.NewLine +
                                        "\tException message:  " + ex.Message + Environment.NewLine +
                                        "\tR output:  " + output + Environment.NewLine +
                                        "\tR orror:  " + error);
                }
                finally
                {
                    try { File.Delete(ColumnMaxMinPath); }
                    catch { }
                    try { File.Delete(ClassPath); }
                    catch { }
                    try { File.Delete(AdaModelPath); }
                    catch { }
                    try { File.Delete(RawPredictionInstancesPath); }
                    catch { }
                    try { File.Delete(PredictionsPath); }
                    catch { }
                }
            }
        }