Exemplo n.º 1
0
        public override void Map(string inputLine, MapperContext context)
        {
            char[] delimiterChars = { ',' };

            //split up the passed in line
            string[] individualItems = inputLine.Trim().Split(delimiterChars);

            //Step to Identify the Practice or Prescription data set
            //Using the SDK, tried to use the MapperContext.InputFileName property: it is always empty
            //So decided to use items count of each datasets
            //Items count 8 for practice and Items count 9 for prescription

            if (individualItems.Count() == 8)
            {
                Practices practice = practiceLinereader.ExtractPracticesFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(practice.ReferenceId))
                {
                    return;
                }                                                                 //Ignore, practise name cannot be null
                if (String.IsNullOrWhiteSpace(practice.Name))
                {
                    return;
                }                                                          //Ignore, practise name cannot be null
                if (String.IsNullOrWhiteSpace(practice.PostCode))
                {
                    return;
                }                                                              //Ignore, practise name cannot be null

                context.EmitKeyValue(practice.ReferenceId, Convert.ToString(practice.PostCode));
            }

            //Items count 9 for prescription

            if (individualItems.Count() == 9)
            {
                Prescription prescription = prescriptionLinereader.ExtractPrescriptionsFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(prescription.PracticeId))
                {
                    return;
                }                                                                    //Ignore, practise name cannot be null

                // parctice Id with Amount

                context.EmitKeyValue(prescription.PracticeId, prescription.ActualCost.ToString("0.00"));
            }
        }
Exemplo n.º 2
0
            public override void Map(string inputLine, MapperContext context)
            {
                List <string> split_input;

                // Replace '- ' with empty value, so input lines are separated only by white space
                inputLine = inputLine.Replace("- ", "");

                // Using Regex to split input lines by white space unless text enclosed with in double quotes and '[]' and store each field as list
                split_input = System.Text.RegularExpressions.Regex.Matches(inputLine, @"[\""].+?[\""]|[[].+?[]]|[^ ]+")
                              .Cast <System.Text.RegularExpressions.Match>()
                              .Select(m => m.Value)
                              .ToList();

                string val = string.Empty;

                // Record without host name and Date are not taken into consideration
                if (split_input.Count > 1)
                {
                    // Parsing date string to DateTime format with date alone, excluding time
                    DateTime date = DateTime.Parse(split_input[1].Substring(1, split_input[1].IndexOf(':') - 1));
                    foreach (string s in split_input)
                    {
                        val += s + "\t";
                    }
                    if (!string.IsNullOrEmpty(split_input[4]))
                    {
                        context.EmitKeyValue(split_input[0] + "\t", val);
                    }
                }
            }
        public override void Map(string inputLine, MapperContext context)
        {
            var delivery = new Delivery();
            double result = 0.0;

            context.Log("MAPPER:::START");
            context.Log(inputLine);
            context.Log("UTF-8: " + Encoding.UTF8.GetBytes(inputLine).Length);
            context.Log("ASCII: " + Encoding.ASCII.GetBytes(inputLine).Length);

            // Read the incoming string as a Thrift Binary serialized object
            var inputStream = new MemoryStream(Encoding.UTF8.GetBytes(inputLine));
            using (var transport = new TStreamTransport(inputStream, null))
            {
                delivery.Read(new TBinaryProtocol(transport));
                context.Log("MAPPER:::AFTER_READ");

                // Get the driven kilometers from the vehicle's odometer sensor
                var sensorData = delivery.Vehicle.SensorHistory;
                var minOdo = sensorData.Min(d => d.OdoMeter);
                var maxOdo = sensorData.Max(d => d.OdoMeter);
                result = maxOdo - minOdo;

                context.Log("MAPPER:::BEFORE_STREAM_CLOSE");
            }
            context.Log("MAPPER:::AFTER_STREAM_CLOSE");

            // Emit the vehicle id, and the driven kilometers.
            if (result > 0.1)
            {
                context.EmitKeyValue(delivery.Vehicle.VehicleId, result.ToString(CultureInfo.InvariantCulture));
            }

            context.Log("MAPPER:::END");
        }
        public override void Map(string inputLine, MapperContext context)
        {
            try
            {
                var doc = XDocument.Parse(inputLine);

                var sensors =
                    doc.Descendants("sensor")
                       .Select(
                           element => new
                                          {
                                              name = (string)element.Attribute("name"),
                                              value = element.Value
                                          });

                foreach (var sensor in sensors)
                {
                    context.EmitKeyValue(sensor.name, sensor.value);
                }
            }
            catch (Exception x)
            {
                context.EmitLine(x.ToString());
            }
        }
        // Gets the total number of purchases made by users in North America & in Europe.
        // The input is the output from the AggregatePurchaseMapper job (which map reduced the 'clickstearm' data file).
        // In format........'Country \t Number of Purchases \t % of the purchases made by New users'
        // E.g. 'France	5	20'
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');

            if (dataRow.Count() == 3)
            {
                var country = dataRow[0];
                int numPurchases;
                int percentNew;
                if (int.TryParse(dataRow[1], out numPurchases) && int.TryParse(dataRow[2], out percentNew))
                {
                    var continent = string.Empty;

                    if (_europeanCountries.Contains(country))
                    {
                        continent = "Europe";
                    }
                    else if (_northAmericanCountries.Contains(country))
                    {
                        continent = "North America";
                    }

                    if (!string.IsNullOrEmpty(continent))
                    {
                        var numNewUserPurchases = numPurchases * percentNew / 100;
                        context.EmitKeyValue(continent, numNewUserPurchases.ToString(CultureInfo.InvariantCulture));
                    }
                }
            }
        }
Exemplo n.º 6
0
        public override void Map(string inputLine, MapperContext context)
        {
            Console.WriteLine("inputLine" + inputLine);
            var words = inputLine.Split(new char[] { ' ' }).ToList();

            words.ForEach((word) => context.EmitKeyValue(word, "1"));
        }
        // Gets the total number of purchases made by users in North America & in Europe.
        // The input is the output from the AggregatePurchaseMapper job (which map reduced the 'clickstearm' data file).
        // In format........'Country \t Number of Purchases \t % of the purchases made by New users'
        // E.g. 'France	5	20'
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');
            if (dataRow.Count() == 3)
            {
                var country = dataRow[0];
                int numPurchases;
                int percentNew;
                if (int.TryParse(dataRow[1], out numPurchases) && int.TryParse(dataRow[2], out percentNew))
                {
                    var continent = string.Empty;

                    if (_europeanCountries.Contains(country))
                    {
                        continent = "Europe";
                    }
                    else if (_northAmericanCountries.Contains(country))
                    {
                        continent = "North America";
                    }

                    if (!string.IsNullOrEmpty(continent))
                    {
                        var numNewUserPurchases = numPurchases * percentNew / 100;
                        context.EmitKeyValue(continent, numNewUserPurchases.ToString(CultureInfo.InvariantCulture));
                    }

                }
            }
        }
Exemplo n.º 8
0
        public override void Map(string inputLine, MapperContext context)
        {
            var data = inputLine.Split('\t');

            //countrycode --- name,value,originalValue
            context.EmitKeyValue(data[0], data[1] + '\t' + data[2] + '\t' + data[3] + '\t' + data[4]);
        }
Exemplo n.º 9
0
            public override void Map(string inputLine, MapperContext context)
            {
                string key;

                // Replace '- ' with empty value, so input lines are separated only by white space
                inputLine = inputLine.Replace("- ", "");
                List <string> line_fields;

                // Using Regex to split input lines by white space unless text enclosed with in double quotes and '[]' and store each field as list
                line_fields = System.Text.RegularExpressions.Regex.Matches(inputLine, @"[\""].+?[\""]|[[].+?[]]|[^ ]+")
                              .Cast <System.Text.RegularExpressions.Match>()
                              .Select(m => m.Value)
                              .ToList();
                inputLine = string.Empty;

                //Remove all special characters in Byte column
                line_fields[4] = System.Text.RegularExpressions.Regex.Replace(line_fields[4], "[^0-9_]+", "").Trim(' ');

                //Set '0' if string contain empty space
                if (string.IsNullOrEmpty(line_fields[4]))
                {
                    line_fields[4] = "0";
                }

                //Split the each column by '\t' tab seperator.
                foreach (string s in line_fields)
                {
                    inputLine += s + "\t";
                }
                inputLine = inputLine.TrimEnd('\t');

                //Set Hostname as Key for filtering
                key = line_fields[0];
                context.EmitKeyValue(key, inputLine.ToString());
            }
Exemplo n.º 10
0
            public override void Map(string inputLine, MapperContext context)
            {
                List <string> split_input;

                // Replace '- ' with empty value, so input lines are separated only by white space
                inputLine = inputLine.Replace("- ", "");

                // Using Regex to split input lines by white space unless text enclosed with in double quotes and '[]' and store each field as list
                split_input = System.Text.RegularExpressions.Regex.Matches(inputLine, @"[\""].+?[\""]|[[].+?[]]|[^ ]+")
                              .Cast <System.Text.RegularExpressions.Match>()
                              .Select(m => m.Value)
                              .ToList();

                string val = string.Empty;

                if (split_input.Count > 1)
                {
                    // Each column in record separated by \t
                    foreach (string s in split_input)
                    {
                        val += s + "\t";
                    }
                    // Emitting column 2 as key with entire row.
                    context.EmitKeyValue(split_input[1], val);
                }
            }
Exemplo n.º 11
0
        public override void Map(string inputLine, MapperContext context)
        {
            //Step to Identify the Practice or Prescription data set
            //Using the SDK, tried to use the MapperContext.InputFileName property: it is always empty
            //So decided to use items count of each datasets
            //Items count 8 for practice and Items count 9 for prescription

            char[] delimiterChars = { ',' };

            //split up the passed in line
            string[] individualItems = inputLine.Trim().Split(delimiterChars);

            if (individualItems.Count() == 9)
            {
                Prescription prescription = reader.ExtractPrescriptionsFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(prescription.PracticeId))
                {
                    return;
                }                                                                    //Ignore, practise name cannot be null

                //Filter by peppermint oil

                if (prescription.BNFName.ToLower() != "peppermint oil")
                {
                    return;
                }                                                                    //Ignore, if filtor not matched

                context.EmitKeyValue(prescription.BNFName, prescription.ActualCost.ToString("0.00"));
            }
        }
Exemplo n.º 12
0
        public override void Map(string inputLine, MapperContext context)
        {
            var matches = commaSplit.Matches(inputLine);

            //Hadoop is semi-structured, meaning that we do not need every row to match
            //our expected structure.  This is a simplistic check, but given a more polyglot
            //data set, we could filter out unexpected rows or perform more complex filtering.
            if (matches == null || matches.Count != 6)
            {
                //If this were a production data set, I would think about logging bad rows
                //but because this is a demo, I just want to return without passing anything
                //to be reduced.
                return;
            }

            //Trim off any leading comma or surrounding quotation marks.
            string key = matches[1].Value.TrimStart(',').Trim('\"');

            //We don't want to return the header row.
            if (key == "LCR DESC")
            {
                return;
            }

            //The Map function returns a set of key-value pairs.
            //The key in this case is the specific crime, and our value
            //is "1" (which we will use to get a count).
            context.EmitKeyValue(key, value);
        }
Exemplo n.º 13
0
        public override void Map(string inputLine, MapperContext context)
        {
            var matches = commaSplit.Matches(inputLine);

            //Hadoop is semi-structured, meaning that we do not need every row to match
            //our expected structure.  This is a simplistic check, but given a more polyglot
            //data set, we could filter out unexpected rows or perform more complex filtering.
            if (matches == null || matches.Count != 6)
            {
                //If this were a production data set, I would think about logging bad rows
                //but because this is a demo, I just want to return without passing anything
                //to be reduced.
                return;
            }

            //Trim off any leading comma or surrounding quotation marks.
            string key = matches[1].Value.TrimStart(',').Trim('\"');

            //We don't want to return the header row.
            if (key == "LCR DESC")
            {
                return;
            }

            //The Map function returns a set of key-value pairs.
            //The key in this case is the specific crime, and our value
            //is "1" (which we will use to get a count).
            context.EmitKeyValue(key, value);
        }
        public override void Map(string inputLine, MapperContext context)
        {
            //Step to Identify the Practice or Prescription data set
            //Using the SDK, tried to use the MapperContext.InputFileName property: it is always empty
            //So decided to use items count of each datasets
            //Items count 8 for practice and Items count 9 for prescription

            char[] delimiterChars = { ',' };

            //split up the passed in line
            string[] individualItems = inputLine.Trim().Split(delimiterChars);

            if (individualItems.Count() == 9)
            {
                Prescription prescription = reader.ExtractPrescriptionsFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(prescription.PracticeId)) { return; }  //Ignore, practise name cannot be null

                //Filter by peppermint oil

                if (prescription.BNFName.ToLower() != "peppermint oil") { return; }  //Ignore, if filtor not matched

                context.EmitKeyValue(prescription.BNFName, prescription.ActualCost.ToString("0.00"));
            } 
        }
        public override void Map(string inputLine, MapperContext context)
        {
            try
            {
                var parts = inputLine.Split(',');
                if (parts.Length != 16)
                {
                    return;
                }
                if (parts[13].Equals(""))
                {
                    return;
                }
                if (parts[8].Equals(""))
                {
                    return;
                }

                // avoid culture problems
                string formatString = "dd/MM/yyyy HH:mm";
                var    start        = DateTime.ParseExact(parts[4], formatString, null);
                var    end          = DateTime.ParseExact(parts[8], formatString, null);

                var key   = String.Format("{0},{1}", parts[10], parts[13]);
                var value = String.Format("{0},{1}", start.ToString(), end.ToString());
                context.EmitKeyValue(key, value);
            }
            catch (Exception)
            {
            }
        }
Exemplo n.º 16
0
        public override void Map(string inputLine, MapperContext context)
        {
            var value = int.Parse(inputLine);

            var key = value % 2 == 0 ? "even" : "odd";

            context.EmitKeyValue(key, value.ToString());
        }
Exemplo n.º 17
0
 public override void Map(string inputLine, MapperContext context)
 {
     //interpret the incoming line as an integer value
     var value = int.Parse(inputLine);
     //determine whether value is even or odd
     string key = value%2 == 0 ? "even" : "odd";
     //output key assignment with value
     context.EmitKeyValue(key, value.ToString());
 }
Exemplo n.º 18
0
            };                                                                                    //ascii 58--64 + misc.

            public override void Map(string inputLine, MapperContext context)
            {
                foreach (string word in inputLine.Trim().Split(this._punctuationChars))
                {
                    context.IncrementCounter("mapInputs");
                    context.Log(string.Format("Map::  {0},{1}", word, "1"));
                    context.EmitKeyValue(word, "1");
                }
            }
        public override void Map(string inputLine, MapperContext context)
        {
            // The odometer is on index 2
            var parts      = inputLine.Split('\t');
            var id         = parts[0];
            var kilometers = parts[2];

            context.EmitKeyValue(id, kilometers);
        }
Exemplo n.º 20
0
        public override void Map(string inputLine, MapperContext context)
        {
            var words = inputLine.Split(' ', '\n', '\t');

            foreach (var word in words)
            {
                context.EmitKeyValue(word, 1.ToString());
            }
        }
        public override void Map(string inputLine, MapperContext context)
        {
            // The odometer is on index 2
            var parts = inputLine.Split('\t');
            var id = parts[0];
            var kilometers = parts[2];

            context.EmitKeyValue(id, kilometers);
        }
Exemplo n.º 22
0
            public override void Map(string inputLine, MapperContext context)
            {
                int inputValue = int.Parse(inputLine);

                //Perform the work
                double sqtr = Math.Sqrt((double)inputValue);

                //write Output Data
                context.EmitKeyValue(inputValue.ToString(), sqtr.ToString());
            }
Exemplo n.º 23
0
        public override void Map(string inputLine, MapperContext context)
        {
            // 输入
            int inputValue = int.Parse(inputLine);
            // 任务
            var sqrt = Math.Sqrt(inputValue);

            // 写入输出
            context.EmitKeyValue(inputValue.ToString(), sqrt.ToString());
        }
Exemplo n.º 24
0
        public override void Map(string inputLine, MapperContext context)
        {
            int input = int.Parse(inputLine);

            // Find the square root.
            double root = Math.Sqrt((double)input);

            // Write output.
            context.EmitKeyValue(input.ToString(), root.ToString());
        }
Exemplo n.º 25
0
        public override void Map(string inputLine, MapperContext context)
        {
            string[] terms = inputLine.Split('\t');
            // add a sanity check in case we have a data quality issue
            if (terms.Length != 6) return;

            // get the country part out
            string country = terms[3];
            context.EmitKeyValue(country, "1");
        }
Exemplo n.º 26
0
        public override void Map(string inputLine, MapperContext context)
        {
            //interpret the incoming line as an integer value
            int value = int.Parse(inputLine);
            //determine whether value is even or odd
            string key = (value % 2 == 0) ? "even" : "odd";

            //output key assignment with value
            context.EmitKeyValue(key, value.ToString(CultureInfo.InvariantCulture));
        }
 public override void Map(string inputLine, MapperContext context)
 {
     string[] inputValues = inputLine.Split(',');
     //Pulls all responses that link to the id of this question
     if (inputValues[1].Trim() == "10001")
     {
         string ip = inputValues[0].Trim();
         string country = IpAddressResolver.Resolve(ip);
         context.EmitKeyValue(country, inputValues[2]);
     }
 }
Exemplo n.º 28
0
        public override void Map(string inputLine, MapperContext context)
        {
            string[] words =
                inputLine.Trim().Split(new char[] { ' ', '.', '?', ',', ':', '!', '"', ')', '(', '-', '*', '$', '\'', ';', '\\', '/' },
                                       StringSplitOptions.RemoveEmptyEntries);

            for (int i = 0; i < words.Count() - 1; i++)
            {
                context.EmitKeyValue(words[i], words[i + 1]);
            }
        }
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');
            if (dataRow.Count() == 4)
            {
                var userId = dataRow[1];
                var rating = dataRow[2];
                var movieId = dataRow[3];

                context.EmitKeyValue(userId, StringUtility.GetAsTabbedString(new[]{ movieId, rating}));
            }
        }
Exemplo n.º 30
0
        //Override the map method.
        public override void Map(string inputLine, MapperContext context)
        {
            //Extract the namespace declarations in the Csharp files
            var reg     = new Regex(@"(using)\s[A-za-z0-9_\.]*\;");
            var matches = reg.Matches(inputLine);

            foreach (Match match in matches)
            {
                //Just emit the namespaces.
                context.EmitKeyValue(match.Value, "1");
            }
        }
Exemplo n.º 31
0
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');

            if (dataRow.Count() == 4)
            {
                var userId  = dataRow[1];
                var rating  = dataRow[2];
                var movieId = dataRow[3];

                context.EmitKeyValue(userId, StringUtility.GetAsTabbedString(new[] { movieId, rating }));
            }
        }
Exemplo n.º 32
0
        public override void Map(string inputLine, MapperContext context)
        {
            string[] fields = inputLine.Split(',');
            string horse = fields[0];
            string jockey = fields[1];
            string course = fields[2];

            string stats = GetStatsForHorseJockeyCourse(
                horse,
                jockey,
                course);

            context.EmitKeyValue(horse, stats);
        }
Exemplo n.º 33
0
        public override void Map(string inputLine, MapperContext context)
        {
            string[] fields = inputLine.Split(',');
            string   horse  = fields[0];
            string   jockey = fields[1];
            string   course = fields[2];

            string stats = GetStatsForHorseJockeyCourse(
                horse,
                jockey,
                course);

            context.EmitKeyValue(horse, stats);
        }
        public override void Map(string inputLine, MapperContext context)
        {
            char[] delimiterChars = { ','};

            //split up the passed in line
            string[] individualItems = inputLine.Trim().Split(delimiterChars);

            //Step to Identify the Practice or Prescription data set
            //Using the SDK, tried to use the MapperContext.InputFileName property: it is always empty
            //So decided to use items count of each datasets
            //Items count 8 for practice and Items count 9 for prescription

            if (individualItems.Count() == 8)
            {
                Practices practice = practiceLinereader.ExtractPracticesFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(practice.ReferenceId)) { return; }  //Ignore, practise name cannot be null
                if (String.IsNullOrWhiteSpace(practice.Name)) { return; }  //Ignore, practise name cannot be null
                if (String.IsNullOrWhiteSpace(practice.PostCode)) { return; }  //Ignore, practise name cannot be null

                context.EmitKeyValue(practice.ReferenceId, Convert.ToString(practice.PostCode));
            }

            //Items count 9 for prescription

            if (individualItems.Count() == 9)
            {
                Prescription prescription = prescriptionLinereader.ExtractPrescriptionsFromCsvLineFormat(inputLine);

                if (String.IsNullOrWhiteSpace(prescription.PracticeId)) { return; }  //Ignore, practise name cannot be null

               // parctice Id with Amount

                context.EmitKeyValue(prescription.PracticeId, prescription.ActualCost.ToString("0.00"));
            }
           
        }
        public override void Map(string inputLine, MapperContext context)
        {
            //example input: Hello, Andy
            if (!inputLine.StartsWith("Hello, "))
            {
                context.Log(string.Format("The inputLine {0} is not in the correct format", inputLine));
                context.IncrementCounter("RecoverableError", "InputFormatIncorrect", 1);
                return;
            }

            var key = inputLine.Substring(7);
            if (key.EndsWith(".")) key = key.Trim('.');

            context.EmitKeyValue(key, "1");//we are going to count instances, the value is irrelevant
        }
Exemplo n.º 36
0
        public override void Map(string inputLine, MapperContext context)
        {
            //example input: Hello, Andy
            if (!inputLine.StartsWith("Hello, "))
            {
                return;
            }

            var key = inputLine.Substring(7);

            if (key.EndsWith("."))
            {
                key = key.Trim('.');
            }

            context.EmitKeyValue(key, "1");//we are going to count instances, the value is irrelevant
        }
Exemplo n.º 37
0
        public override void Map(string inputLine, MapperContext context)
        {
            var PROCESSED_YEAR = 2007;

            var data = ExtractValueFromLine(inputLine);

            if (data.CountryName == "Country Name")
            {
                return;
            }
            if (data.YearValues.ContainsKey(PROCESSED_YEAR))
            {
                var trend = ComputeTrend(data.YearValues, PROCESSED_YEAR);

                context.EmitKeyValue(data.IndicatorCode, data.CountryCode + '\t' + data.CountryName + '\t' + trend + '\t' + data.YearValues[PROCESSED_YEAR]);
            }
        }
Exemplo n.º 38
0
            public override void Map(string inputLine, MapperContext context)
            {
                string key;

                // Replace '- ' with empty value, so input lines are separated only by white space
                inputLine = inputLine.Replace("- ", "");
                List <string> line_fields;

                // Using Regex to split input lines by white space unless text enclosed with in double quotes and '[]' and store each field as list
                line_fields = System.Text.RegularExpressions.Regex.Matches(inputLine, @"[\""].+?[\""]|[[].+?[]]|[^ ]+")
                              .Cast <System.Text.RegularExpressions.Match>()
                              .Select(m => m.Value)
                              .ToList();


                context.EmitKeyValue(line_fields[3], inputLine.ToString());
            }
        // Gets the total number of purchases in each country across the 4 days & the % of these purchases made by new users.
        // Each data row in 'clickstearm' data file is in the format...
        // 'DateTime of Hit \t ProductId \t VisitorType \t Country of Request \t Referrer \t Action'
        // E.g. 01/03/2013 18:51:31	159822	Regular	USA	Adword	Purchase
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');
            if (dataRow.Count() == 6)
            {
                var visitorType = dataRow[2];
                var country = dataRow[3];
                var action = dataRow[5];

                if (action == "Purchase")
                {
                    context.IncrementCounter("HitMissProgress", "PurchaseFound", 1);
                    context.EmitKeyValue(country, visitorType);
                }
                context.IncrementCounter("HitMissProgress", "PurchaseNotFound", 1);
            }
        }
Exemplo n.º 40
0
        // Gets the total number of purchases in each country across the 4 days & the % of these purchases made by new users.
        // Each data row in 'clickstearm' data file is in the format...
        // 'DateTime of Hit \t ProductId \t VisitorType \t Country of Request \t Referrer \t Action'
        // E.g. 01/03/2013 18:51:31	159822	Regular	USA	Adword	Purchase
        public override void Map(string inputLine, MapperContext context)
        {
            var dataRow = inputLine.Split('\t');

            if (dataRow.Count() == 6)
            {
                var visitorType = dataRow[2];
                var country     = dataRow[3];
                var action      = dataRow[5];

                if (action == "Purchase")
                {
                    context.IncrementCounter("HitMissProgress", "PurchaseFound", 1);
                    context.EmitKeyValue(country, visitorType);
                }
                context.IncrementCounter("HitMissProgress", "PurchaseNotFound", 1);
            }
        }
Exemplo n.º 41
0
            public override void Map(string inputLine, MapperContext context)
            {
                string[] words = inputLine.Split(' ');
                if (words.Count <string>() > 0)
                {
                    //Get the datetimevalue value in the inputLine
                    string datetimevalue = words[3].Remove(0, 1);

                    //Convert the datetimevalue into into the DateTime format
                    string[] value    = datetimevalue.Split(':');
                    string[] date     = value[0].Split('/');
                    string   datetime = date[2] + "-07-" + date[0] + " " + value[1] + ":" + value[2] + ":" + value[3];
                    DateTime dateTime = Convert.ToDateTime(datetime);

                    //Emit the Hour value and Ipaddress/domain values
                    context.EmitKeyValue(dateTime.Hour.ToString(), words[0]);
                }
            }
Exemplo n.º 42
0
        public override void Map(string inputLine, MapperContext context)
        {
            var matches = commaSplit.Matches(inputLine);

            //Hadoop is semi-structured, meaning that we do not need every row to match
            //our expected structure.  This is a simplistic check, but given a more polyglot
            //data set, we could filter out unexpected rows or perform more complex filtering.
            if (matches == null || matches.Count != 6)
            {
                //If this were a production data set, I would think about logging bad rows
                //but because this is a demo, I just want to return without passing anything
                //to be reduced.
                return;
            }

            //Trim off any leading comma or surrounding quotation marks.
            string key = matches[5].Value.TrimStart(',').Trim('\"').Trim('(').Trim(')');

            //We don't want to return the header row.
            if (key == "LOCATION")
            {
                return;
            }

            //At this point, we have coordinates which look like
            //35.776238687249744, -78.6246378053371
            //We want to solve to 3 places after the decimal point to
            //get a better idea of "neighborhood"/area-level crime.
            var keyvals = key.Split(',');

            if (keyvals.Length != 2)
            {
                return;
            }

            key = keyvals[0].Trim(' ').Substring(0, keyvals[0].IndexOf('.') + 4)
                  + ", " + keyvals[1].Trim(' ').Substring(0, keyvals[1].IndexOf('.') + 3);

            //The Map function returns a set of key-value pairs.
            //The key in this case is the specific crime, and our value
            //is "1" (which we will use to get a count).
            context.EmitKeyValue(key, value);
        }
        public override void Map(string inputLine, MapperContext context)
        {
            //example input: Hello, Andy
            if (!inputLine.StartsWith("Hello, "))
            {
                context.Log(string.Format("The inputLine {0} is not in the correct format", inputLine));
                context.IncrementCounter("RecoverableError", "InputFormatIncorrect", 1);
                return;
            }

            var key = inputLine.Substring(7);

            if (key.EndsWith("."))
            {
                key = key.Trim('.');
            }

            context.EmitKeyValue(key, "1");//we are going to count instances, the value is irrelevant
        }
Exemplo n.º 44
0
        public override void Map(String line, MapperContext context)
        {
            //context.Log(string.Format("Mapper: Input- {0}", line));
            Tweet  atweet   = Tweet.Deserializer(line);
            string polarity = SentimentAnalyser.findSentiment(atweet.Text);

            int year  = atweet.Time.Year;
            int month = atweet.Time.Month;

            string country = Country.getCountryByLongitudeLattitude(atweet.lattitude, atweet.longitude);

            if (year == 2015 || year == 2014)
            {
                //context.IncrementCounter("HitMissProgress", "TweetOfRecentYearFound", 1);
                //  context.Log(string.Format("Emitting- {0} | {1}", country, polarity));
                context.EmitKeyValue(country, polarity);
            }
            //context.IncrementCounter("HitMissProgress", "TweetOfRecentYearNOTFound", 1);
        }
Exemplo n.º 45
0
        public override void Map(string inputLine, MapperContext context)
        {
            var ofertas = ServiceHelper.Deserialize<OffertResult>(inputLine);

            foreach (var oferta in ofertas.offers.Where(x => x.category != null && !String.IsNullOrEmpty(x.category.value)))
            {
                if (oferta.province != null && !String.IsNullOrEmpty(oferta.province.value))
                {
                    var fecha = String.Format("{0}:00:00", Convert.ToDateTime(oferta.published).Hour);
                    var provincia = Province.DepurarProvincia(oferta.province.value);

                    var linea = String.Format("{0};{1};{2}" + Environment.NewLine,
                       oferta.category.value,
                       provincia,
                       fecha);

                    context.EmitKeyValue(linea, "1");
                }
            }
        }
Exemplo n.º 46
0
        public override void Map(string inputLine, MapperContext context)
        {
            if (inputLine.Contains(";"))
            {
                var array = inputLine.Split(';');

                if (array.Length >= 3)
                {
                    var provincia = Province.DepurarProvincia(array[2]);
                    var fecha = String.Format("{0}:00:00",
                        Convert.ToDateTime(array[1], new CultureInfo("es-es")).Hour);

                    var linea = String.Format("{0};{1};{2}" + Environment.NewLine,
                               array[0],
                               provincia,
                               fecha);

                    context.EmitKeyValue(linea, "1");
                }
            }
        }
Exemplo n.º 47
0
        public override void Map(string inputLine, MapperContext context)
        {
            var matches = commaSplit.Matches(inputLine);

            //Hadoop is semi-structured, meaning that we do not need every row to match
            //our expected structure.  This is a simplistic check, but given a more polyglot
            //data set, we could filter out unexpected rows or perform more complex filtering.
            if (matches == null || matches.Count != 6)
            {
                //If this were a production data set, I would think about logging bad rows
                //but because this is a demo, I just want to return without passing anything
                //to be reduced.
                return;
            }

            //Trim off any leading comma or surrounding quotation marks.
            string key = matches[5].Value.TrimStart(',').Trim('\"').Trim('(').Trim(')');

            //We don't want to return the header row.
            if (key == "LOCATION")
            {
                return;
            }

            //At this point, we have coordinates which look like
            //35.776238687249744, -78.6246378053371
            //We want to solve to 3 places after the decimal point to
            //get a better idea of "neighborhood"/area-level crime.
            var keyvals = key.Split(',');
            if (keyvals.Length != 2)
            {
                return;
            }

            key = keyvals[0].Trim(' ').Substring(0, keyvals[0].IndexOf('.') + 4)
                  + ", " + keyvals[1].Trim(' ').Substring(0, keyvals[1].IndexOf('.') + 3);

            //The Map function returns a set of key-value pairs.
            //The key in this case is the specific crime, and our value
            //is "1" (which we will use to get a count).
            context.EmitKeyValue(key, value);
        }
Exemplo n.º 48
0
        public override void Map(string inputLine, MapperContext context)
        {
            //split line on tabs
            try
            {
                var s = JsonConvert.DeserializeObject<Roo>(inputLine);


                //get year observed
                if (string.IsNullOrEmpty(s.prepTime))
                {
                    s.prepTime = "Un";
                }

                //get ufo type
                if (string.IsNullOrEmpty(s.name))
                {
                    s.name = "nul";
                }

                context.EmitKeyValue(s.prepTime, s.name);


                //send output

            }
            catch (InvalidCastException ex)
            {
                context.EmitKeyValue("error", "roor");
            }

        }
Exemplo n.º 49
0
        public override void Map(string inputLine, MapperContext context)
        {
            var PROCESSED_YEAR = 2007;

            var data = ExtractValueFromLine(inputLine);
            if (data.CountryName == "Country Name")
            {
                return;
            }
            if (data.YearValues.ContainsKey(PROCESSED_YEAR))
            {
                var trend = ComputeTrend(data.YearValues, PROCESSED_YEAR);

                context.EmitKeyValue(data.IndicatorCode, data.CountryCode + '\t' + data.CountryName + '\t' + trend + '\t' + data.YearValues[PROCESSED_YEAR]);

            }

        }
Exemplo n.º 50
0
        public override void Map(string inputLine, MapperContext context)
        {

            var data = inputLine.Split('\t');
            //countrycode --- name,value,originalValue
            context.EmitKeyValue(data[0], data[1] + '\t' + data[2] + '\t' + data[3] + '\t' + data[4]);
        }
Exemplo n.º 51
0
 public override void Map(string inputLine, MapperContext context)
 {
     var data = inputLine.Split('\t');
     context.EmitKeyValue((-Double.Parse(data[1])).ToString(), inputLine);//order by value descending
 }