Example #1
0
        public static TwitterTextConfiguration ConfigurationFromJson(string json, bool isResource)
        {
            TwitterTextConfiguration config = new TwitterTextConfiguration();

            try {
                if (isResource)
                {
                    string jsonRaw = GetEmbeddedResource("Resources/" + json, Assembly.GetExecutingAssembly());
                    ConfigurationProperties jsonDeserialized = JsonConvert.DeserializeObject <ConfigurationProperties>(jsonRaw);
                    config.Properties = jsonDeserialized;
                }
                else
                {
                    config.Properties = JsonConvert.DeserializeObject <ConfigurationProperties>(json);
                }
            } catch {
                return(GetDefaultConfig());
            }

            return(config);
        }
Example #2
0
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }

            if (obj == null || GetType() != obj.GetType())
            {
                return(false);
            }

            TwitterTextConfiguration that = (TwitterTextConfiguration)obj;

            return(Properties.Version == that.Properties.Version &&
                   Properties.MaxWeightedTweetLength == that.Properties.MaxWeightedTweetLength &&
                   Properties.Scale == that.Properties.Scale &&
                   Properties.DefaultWeight == that.Properties.DefaultWeight &&
                   Properties.TransformedUrlLength == that.Properties.TransformedUrlLength &&
                   Properties.Ranges.Equals(that.Properties.Ranges));
        }
        public static TwitterTextParseResults ParseTweet(string tweet, TwitterTextConfiguration config, bool extractUrls, bool fixCountNewLine = true)
        {
            if (string.IsNullOrEmpty(tweet.Trim()))
            {
                return(EMPTY_TWITTER_TEXT_PARSE_RESULTS);
            }

            string normalizedTweet = tweet.Normalize(NormalizationForm.FormC);

            normalizedTweet = fixCountNewLine ? normalizedTweet.Replace(Environment.NewLine, "\n") : normalizedTweet;

            int tweetLength = normalizedTweet.Length;

            if (tweetLength == 0)
            {
                return(EMPTY_TWITTER_TEXT_PARSE_RESULTS);
            }

            int scale = config.Properties.Scale;
            int maxWeightedTweetLength             = config.Properties.MaxWeightedTweetLength;
            int scaledMaxWeightedTweetLength       = maxWeightedTweetLength * scale;
            int transformedUrlWeight               = config.Properties.TransformedUrlLength * scale;
            List <TwitterTextWeightedRange> ranges = config.Properties.Ranges;

            List <Entity> urlEntities = EXTRACTOR.ExtractUrlsWithIndices(normalizedTweet);

            bool hasInvalidCharacters = false;
            int  weightedCount        = 0;
            int  offset      = 0;
            int  validOffset = 0;

            while (offset < tweetLength)
            {
                int charWeight = config.Properties.DefaultWeight;

                if (extractUrls)
                {
                    List <Entity>        toBeRemoved       = new List <Entity>();
                    IEnumerator <Entity> urlEntityIterator = urlEntities.GetEnumerator();
                    while (urlEntityIterator.MoveNext())
                    {
                        Entity urlEntity = urlEntityIterator.Current;

                        if (urlEntity.Start == offset)
                        {
                            int urlLength = urlEntity.End - urlEntity.Start;
                            weightedCount += transformedUrlWeight;
                            offset        += urlLength;

                            if (weightedCount < -scaledMaxWeightedTweetLength)
                            {
                                validOffset += urlLength;
                            }

                            toBeRemoved.Add(urlEntity);
                            break;
                        }
                    }

                    foreach (var entity in toBeRemoved)
                    {
                        urlEntities.Remove(entity);
                    }
                }

                if (offset < tweetLength)
                {
                    int codePoint = normalizedTweet[offset];

                    foreach (var weightedRange in ranges)
                    {
                        if (weightedRange.getRange().IsInRange(codePoint))
                        {
                            charWeight = weightedRange.Weight;
                            break;
                        }
                    }

                    weightedCount += charWeight;

                    hasInvalidCharacters = hasInvalidCharacters || Validator.HasInvalidCharacters(normalizedTweet.Substring(offset, 1));

                    int CharCount(int codepoint) => (codepoint >= 0x10000 ? 2 : 1);

                    int charCount = CharCount(codePoint);
                    offset += charCount;

                    if (!hasInvalidCharacters && weightedCount <= scaledMaxWeightedTweetLength)
                    {
                        validOffset += charCount;
                    }
                }
            }

            int  normalizedTweetOffset = tweet.Length - normalizedTweet.Length;
            int  scaledWeightedLength  = weightedCount / scale;
            bool isValid    = !hasInvalidCharacters && (scaledWeightedLength <= maxWeightedTweetLength);
            int  permillage = scaledWeightedLength * 1000 / maxWeightedTweetLength;

            return(new TwitterTextParseResults {
                WeightedLength = scaledWeightedLength,
                Permillage = permillage,
                IsValid = isValid,
                DisplayTextRange = new Range(0, offset + normalizedTweetOffset - 1),
                ValidTextRange = new Range(0, validOffset + normalizedTweetOffset - 1)
            });
        }
 public static TwitterTextParseResults ParseTweet(string tweet, TwitterTextConfiguration config)
 => ParseTweet(tweet, config, true);