1 package com.day07
 2 
 3 import org.apache.spark.{SparkConf, SparkContext}
 4 
 5 object Leijia {
 6   def main(args: Array[String]): Unit = {
 7     //配置spark
 8     var conf = new SparkConf().setAppName("wc").setMaster("local")
 9 
10     //获取spark上下文对象
11     var sc = new SparkContext(conf)
12 
13     //读取数据文件
14     var dataRdd = sc.textFile("D:\\IDEA_Maven\\day07\\src\\main\\resources\\aa.txt")
15 
16     //默认情况下,是没办法修改i的值的
17     //    var i = 0
18     //定义累加器
19     var i = sc.accumulator(0)
20 
21     dataRdd.foreach(s => {
22       i += 1
23       println(s + i)
24     })
25 
26     println(i)
27   }
28 
29 
30 }
 1 package com.day07
 2 
 3 import org.apache.spark.{SparkConf, SparkContext}
 4 
 5 object TestGB {
 6   def main(args: Array[String]): Unit = {
 7     val sparkConf = new SparkConf().setAppName("TestGB").setMaster("local")
 8     val sc = new SparkContext(sparkConf)
 9 
10     var dataRdd = sc.textFile("D:\\IDEA_Maven\\day07\\src\\main\\resources\\aa.txt")
11 
12     var list = sc.broadcast(List("hello world"))
13 
14     dataRdd.foreach(s => {
15       //使用.value进行获取数据
16       if (list.value.contains(s)) {
17 
18         println(s)
19       }
20     })
21   }
22 
23 }
  1 package com.day07
  2 
  3 import java.sql.{Connection, DriverManager, PreparedStatement}
  4 
  5 import org.apache.spark.broadcast.Broadcast
  6 import org.apache.spark.rdd.RDD
  7 import org.apache.spark.{SparkConf, SparkContext}
  8 
  9 object IPLocaltion_Test {
 10   def main(args: Array[String]): Unit = {
 11     //todo:创建sparkconf 设置参数
 12     //local  1
 13     //local[n]
 14     //local[*]
 15 
 16     //1.本地运行  开发测试
 17     //2.yarn
 18     //3.standalone
 19     val sparkConf: SparkConf = new SparkConf().setAppName("IPLocaltion_Test").setMaster("local")
 20 
 21     //todo:创建SparkContext
 22     val sc = new SparkContext(sparkConf)
 23 
 24     //todo:读取基站数据
 25     val data: RDD[String] = sc.textFile("D:\\IDEA_Maven\\day07\\src\\main\\resources\\ip.txt")
 26 
 27     //todo:对基站数据进行切分 ,获取需要的字段 (ipStart,ipEnd,城市位置,经度,纬度)
 28     val jizhanRDD: RDD[(String, String, String, String, String)] = data.map(_.split("\\|")).map(
 29       x => (x(2), x(3), x(4) + "-" + x(5) + "-" + x(6) + "-" + x(7) + "-" + x(8), x(13), x(14)))
 30 
 31     //todo:获取RDD的数据
 32     val jizhanData: Array[(String, String, String, String, String)] = jizhanRDD.collect()
 33 
 34     //todo:广播变量,一个只读的数据区,所有的task都能读到的地方  广播变量
 35     val jizhanBroadcast: Broadcast[Array[(String, String, String, String, String)]] = sc.broadcast(jizhanData)
 36 
 37 
 38     //todo:读取目标数据
 39     val destData: RDD[String] = sc.textFile("D:\\IDEA_Maven\\day07\\src\\main\\resources\\20090121000132.394251.http.format")
 40 
 41     //todo:获取数据中的ip地址字段
 42     val ipData: RDD[String] = destData.map(_.split("\\|")).map(x => x(1))
 43 
 44     //todo:把IP地址转化为long类型,然后通过二分法去基站数据中查找,找到的维度做wordCount
 45     //map 和mapPartition 的区别
 46     val result = ipData.mapPartitions(iter => {
 47       //获取广播变量中的值
 48       val valueArr: Array[(String, String, String, String, String)] = jizhanBroadcast.value
 49 
 50       //todo:操作分区中的itertator
 51       iter.map(ip => {
 52         //将ip转化为数字long  自己的转换规则
 53         val ipNum: Long = ipToLong(ip)
 54 
 55         //拿这个数字long去基站数据中通过二分法查找,返回ip在valueArr中的下标(目标数据 , 规则)
 56         val index: Int = binarySearch(ipNum, valueArr)
 57 
 58         //根据下标获取对一个的经纬度
 59         val tuple = valueArr(index)
 60         //返回结果 ((经度,维度),1)
 61         ((tuple._4, tuple._5), 1)
 62       })
 63 
 64     })
 65 
 66     //todo:分组聚合
 67     val resultFinal: RDD[((String, String), Int)] = result.reduceByKey(_ + _)
 68 
 69     //todo:打印输出
 70     resultFinal.foreach(println)
 71 
 72     //todo:将结果保存到mysql表中
 73 
 74     resultFinal.map(x => (x._1._1, x._1._2, x._2)).foreachPartition(data2Mysql)
 75     sc.stop()
 76 
 77   }
 78 
 79   //todo:ip转为long类型
 80   def ipToLong(ip: String): Long = {
 81     //todo:切分ip地址。
 82     val ipArray: Array[String] = ip.split("\\.")
 83     var ipNum = 0L
 84     // |:按位或运算符   相同位上只要有1的都是1
 85     // <<:按位进行左移位运算
 86     for (i <- ipArray) {
 87       ipNum = i.toLong | ipNum << 8L
 88     }
 89     ipNum
 90   }
 91 
 92   //todo:通过二分查找法,获取ip在广播变量中的下标
 93   def binarySearch(ipNum: Long, valueArr: Array[(String, String, String, String, String)]): Int = {
 94 
 95     //开始下标
 96     var start = 0
 97     //结束下标
 98     var end = valueArr.length - 1
 99 
100     while (start <= end) {
101       //去中间
102       val middle = (start + end) / 2
103       //判断我们的ip 是否在这个数组的范围内
104       if (ipNum >= valueArr(middle)._1.toLong && ipNum <= valueArr(middle)._2.toLong) {
105         return middle
106       }
107 
108       if (ipNum > valueArr(middle)._2.toLong) {
109         start = middle
110       }
111 
112       if (ipNum < valueArr(middle)._1.toLong) {
113         end = middle
114       }
115     }
116 
117     -1
118   }
119 
120   //todo:数据保存到mysql表中
121   def data2Mysql(iterator: Iterator[(String, String, Int)]): Unit = {
122     //todo:创建数据库连接Connection
123     var conn: Connection = null
124     //todo:创建PreparedStatement对象
125     var ps: PreparedStatement = null
126     //todo:采用拼占位符问号的方式写sql语句。
127     var sql = "insert into iplocation(longitude,latitude,total_count) values(?,?,?)"
128     println(sql)
129     //todo:获取数据连接
130     conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/spark?serverTimezone=UTC", "root", "123")
131 
132 
133     //todo:  选中想被try/catch包围的语句 ctrl+alt+t 快捷键选中try/catch/finally
134     try {
135       iterator.foreach(line => {
136         println("---------------")
137         //todo:预编译sql语句
138         ps = conn.prepareStatement(sql)
139 
140         //todo:对占位符设置值,占位符顺序从1开始,第一个参数是占位符的位置,第二个参数是占位符的值。
141         ps.setString(1, line._1)
142         ps.setString(2, line._2)
143         ps.setLong(3, line._3)
144         //todo:执行
145         ps.execute()
146       })
147     } catch {
148       case e: Exception => println(e)
149     } finally {
150       if (ps != null) {
151         ps.close()
152       }
153       if (conn != null) {
154         conn.close()
155       }
156     }
157 
158   }
159 
160 }