public WordHistogram Clone() { WordHistogram clone = new WordHistogram(_language); foreach (KeyValuePair<string, int> kv in _frequencies) { clone._frequencies[kv.Key] = kv.Value; } return clone; }
private static void CompressColumns(Dictionary<string, AggregationPolicy> aggregationPolicies, PoiService aggregate) { // Compress all keyword columns to a single column (this may be a setting later). var labelsWithKeywordAggregation = aggregationPolicies.Where(kv => kv.Value.IsKeywordAggregation).Select(kv => kv.Key); foreach (BaseContent poi in aggregate.PoIs) { WordHistogram sumHistogram = null; foreach (string label in labelsWithKeywordAggregation) { string labelValue; if (poi.Labels.TryGetValue(label, out labelValue)) { WordHistogram wordHistogram = new WordHistogram(); wordHistogram.Append(labelValue); if (sumHistogram == null) { sumHistogram = wordHistogram.Clone(); } else { sumHistogram.Merge(wordHistogram); } } } if (sumHistogram != null) { poi.Keywords = sumHistogram; } } // Remove all columns that have been compressed to keywords. foreach (BaseContent poI in aggregate.PoIs) { foreach (KeyValuePair<string, AggregationPolicy> kv in aggregationPolicies) { if (!kv.Value.DataIsNumeric && kv.Value.NonNumericAggregationPolicy == AggregationPolicy.NonNumericAggregation.Keywords) { poI.Labels.Remove(kv.Key); } } } // Remove obsolete labels; only in the PoiTypes; they are already gone from the Pois. foreach (BaseContent baseContent in aggregate.PoITypes) { List<string> toOmit = new List<string>(); foreach (string label in baseContent.Labels.Keys) { AggregationPolicy policy; bool remove = true; if (aggregationPolicies.TryGetValue(label, out policy)) { if (!policy.IsOmit && !(policy.DataIsNumeric && policy.NonNumericAggregationPolicy == AggregationPolicy.NonNumericAggregation.Keywords)) { remove = false; } } if (remove) { toOmit.Add(label); } } foreach (string label in toOmit) { baseContent.Labels.Remove(label); } } }
private string AggregateNonNumeric(IEnumerable<string> data) { switch (NonNumericAggregationPolicy) { case NonNumericAggregation.Concatenate: StringBuilder sb = new StringBuilder(); foreach (string s in data) { sb.Append(s); } return sb.ToString(); case NonNumericAggregation.KeepFirst: return data.FirstOrDefault(); case NonNumericAggregation.Keywords: StringBuilder allText = new StringBuilder(); foreach (string text in data) { allText.Append(text).Append(" "); } WordHistogram histogram = new WordHistogram(); return histogram.ToGeoJson(); // Pity, we only support aggregation to strings. case NonNumericAggregation.Omit: default: return null; } }
public void FromXmlBase(ref XElement res, string directoryName) { //var xmlSerializer = new XmlSerializer(typeof (PoI)); //var p = xmlSerializer.Deserialize(res.CreateReader()); try { Id = res.GetGuid("Id"); if (Id == Guid.Empty) Id = Guid.NewGuid(); var n = res.GetString("Name"); ContentId = res.GetString("PoiId", ""); if (String.IsNullOrEmpty(ContentId)) ContentId = n; Priority = res.GetInt("Priority", 2); UserId = res.GetString("UserId"); DateLong = res.GetLong("Date", DateTime.Now.ToEpoch()); UpdatedLong = res.GetLong("Updated", DateTime.Now.ToEpoch()); Layer = res.GetString("Layer"); MaxItems = res.GetNullInt("MaxItems"); var xMid = res.Element("MetaInfoData"); PoiTypeId = res.GetString("PoiTypeId", ""); IsVisibleInMenu = res.GetBool("IsVisibleInMenu", true); Orientation = res.GetDouble("Orientation", 0.0); //if (!string.IsNullOrEmpty(PoiTypeId)) //{ //} if (xMid != null) { var metaInfo = new MetaInfoCollection(); foreach (var xMi in xMid.Elements()) { var mi = new MetaInfo(); mi.FromXml(xMi); metaInfo.Add(mi); } MetaInfo = metaInfo; } if (res.Element("WKT") != null) { var xElement = res.Element("WKT"); if (xElement != null) WktText = xElement.Value; } var xlabels = res.Element("Labels"); if (xlabels != null) { Labels = new Dictionary<string, string>(); foreach (var xk in xlabels.Elements()) { var k = xk.Name.LocalName; // Restore keys starting with numbers or having % or '. k = k.Replace(LabelPercentSubst, "%"); k = k.Replace(LabelQuoteSubst, "'"); if (k.StartsWith(LabelNumPrefix)) { k = k.Substring(LabelNumPrefix.Length); } var s = xk.InnerXml(); Labels[k] = s.RestoreInvalidCharacters(); Labels[k] = Labels[k].Replace("<", "<").Replace(">", ">"); } } var xkeywords = res.Element("Keywords"); if (xkeywords != null) { Keywords = new WordHistogram(); Keywords.FromXml(xkeywords); } if (res.Element("Style") != null) { try { var newStyle = new PoIStyle(); newStyle.FromXml(res.Element("Style"), directoryName, false); //, Service.Settings); // TODO REVIEW: Settings were ignored. Style = newStyle; } catch (Exception) { // OK, keep the old style. } } var media = res.Element("AllMedia"); if (media != null) { AllMedia = new BindableCollection<Media>(); foreach (var m in media.Elements()) { var me = new Media { Content = this }; me.FromXml(m); AllMedia.Add(me); } } var xpos = res.Element("Position"); if (xpos != null) Position = new Position(xpos.GetDouble(Position.LONG_LABEL), xpos.GetDouble(Position.LAT_LABEL), xpos.GetDouble(Position.ALT_LABEL)); // TODO Remember other Position attributes. var px = res.Element("Points"); var mo = res.Element("Models"); if (mo != null) { Models = new List<Model>(); foreach (var xm in mo.Elements()) { var m = new Model(); m.FromXml(xm); Models.Add(m); } } if (px == null) return; var pp = px.Value; Points = new ObservableCollection<Point>(); var ppo = pp.Split(' '); foreach (var poss in ppo) { var split = poss.Split(','); var pt = new Point( Double.Parse(split[0], CultureInfo.InvariantCulture), Double.Parse(split[1], CultureInfo.InvariantCulture)); Points.Add(pt); } } catch (SystemException e) { Logger.Log("DataServer.BaseContent", "Error reading XML " + res + " from " + directoryName, e.Message, Logger.Level.Error, true); } }
public IConvertibleGeoJson FromGeoJson(JObject geoJsonObject, bool newObject = true) { var poi = (newObject) ? new PoI() : this; //var lockId = f["properties"]["lock"].Value<string>(); //var pos = f["properties"]["pos"].Value<string>(); //var angle = f["properties"]["angle"].Value<double>(); foreach (var prp in geoJsonObject["properties"].OfType<JProperty>()) { // TODO REVIEW There are more properties of PoIs that should not end up in the labels, but should be set to a property. Mentioned under ToGeoJson: // Orientation, ContentId, Layer, Date, MaxItems. if (Equals(prp.Name, "Id")) { poi.PoiId = prp.Value.ToString(); } else if (Equals(prp.Name, "FeatureTypeId")) { poi.PoiTypeId = prp.Value.ToString(); } else { poi.Labels[prp.Name] = prp.Value.ToString().RestoreInvalidCharacters(); } } JToken keywordToken; if (geoJsonObject.TryGetValue("keywords", out keywordToken)) { var histogram = new WordHistogram(); histogram.FromGeoJson(keywordToken.ToString(Formatting.None), false); // Not very efficient. Keywords = histogram; } var wkt = (new WellKnownTextIO().FromGeoJson(geoJsonObject["geometry"].ToString(Formatting.None), false)).ToString(); // TODO More efficiency if we allow JObjects as input. if (!string.IsNullOrEmpty(wkt)) { poi.WktText = wkt; } return poi; }
public void Merge(WordHistogram other) { foreach (KeyValuePair<string, int> kv in other._frequencies) { int currentFrequency; if (_frequencies.TryGetValue(kv.Key, out currentFrequency)) { _frequencies[kv.Key] = currentFrequency + other._frequencies[kv.Key]; } else { _frequencies[kv.Key] = other._frequencies[kv.Key]; } } }
public static string DistinctWordsInText(this string text, string language = null) { if (language == null) language = text.Language(); // Slow approximate language detector. IEnumerable<string> distinctWords = new WordHistogram(language, text).DistinctWords; return String.Join(" ", distinctWords); }
public static WordHistogram Histogram(this string text) { WordHistogram wordHistogram = new WordHistogram(text.Language(), text); // Slow approximate language detector. return wordHistogram; }