09 spark连接mysql数据库
[root@localhost ~]# netstat -tunlp 3306
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 1876/dnsmasq
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1097/sshd
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 1095/cupsd
tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1/systemd
tcp6 0 0 :::22 :::* LISTEN 1097/sshd
tcp6 0 0 ::1:631 :::* LISTEN 1095/cupsd
tcp6 0 0 :::33060 :::* LISTEN 1191/mysqld
tcp6 0 0 :::3306 :::* LISTEN 1191/mysqld
tcp6 0 0 :::111 :::* LISTEN 1/systemd
udp 0 0 0.0.0.0:55768 0.0.0.0:* 938/avahi-daemon: r
udp 0 0 192.168.122.1:53 0.0.0.0:* 1876/dnsmasq
udp 0 0 0.0.0.0:67 0.0.0.0:* 1876/dnsmasq
udp 0 0 0.0.0.0:111 0.0.0.0:* 1/systemd
udp 0 0 0.0.0.0:5353 0.0.0.0:* 938/avahi-daemon: r
udp6 0 0 :::50577 :::* 938/avahi-daemon: r
udp6 0 0 :::111 :::* 1/systemd
udp6 0 0 :::5353 :::* 938/avahi-daemon: r
^Z
[1]+ 已停止 netstat -tunlp 3306
[root@localhost ~]# mysql -uroot -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 8
Server version: 8.0.21 Source distribution
Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> show database;
ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'database' at line 1
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mysql |
| performance_schema |
| spark |
| sys |
+--------------------+
5 rows in set (0.12 sec)
mysql> use spark
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> select * from studentl
->
->
->
->
-> ;
ERROR 1146 (42S02): Table 'spark.studentl' doesn't exist
mysql> select * from student;
+------+-----------+--------+------+
| id | name | gender | age |
+------+-----------+--------+------+
| 1 | yuhaoming | F | 23 |
+------+-----------+--------+------+
1 row in set (0.00 sec)
mysql>
[2]+ 已停止 mysql -uroot -p
[root@localhost ~]# pyspark
Python 3.8.3 (default, May 10 2021, 17:05:21)
[GCC 8.3.1 20191121 (Red Hat 8.3.1-5)] on linux
Type "help", "copyright", "credits" or "license" for more information.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/__ / .__/\_,_/_/ /_/\_\ version 3.1.1
/_/
Using Python version 3.8.3 (default, May 10 2021 17:05:21)
Spark context Web UI available at http://192.168.206.134:4040
Spark context available as 'sc' (master = local[*], app id = local-1622467256339).
SparkSession available as 'spark'.
>>> import org.apache.spark.sql.SQLContext
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ModuleNotFoundError: No module named 'org'
>>> spark.read.format('jdbc').option('driver',"com.mysql.jdbc.Driver").option('url','jdbc:myql://localhost:3306/spark?useSSL=false').option('dbtable','student').option('user','root').option('password','990707').load().show()
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/pyspark/sql/readwriter.py", line 210, in load
return self._df(self._jreader.load())
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py", line 117, in deco
raise converted from None
pyspark.sql.utils.IllegalArgumentException: requirement failed: The driver could not open a JDBC connection. Check the URL: jdbc:myql://localhost:3306/spark?useSSL=false
>>> spark.read.format('jdbc').option('driver',"com.mysql.cj.jdbc.Driver").option('url','jdbc:myql://localhost:3306/spark?useSSL=false').option('dbtable','student').option('user','root').option('password','990707').load().show()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/pyspark/sql/readwriter.py", line 210, in load
return self._df(self._jreader.load())
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/usr/local/spark/spark-3.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py", line 117, in deco
raise converted from None
pyspark.sql.utils.IllegalArgumentException: requirement failed: The driver could not open a JDBC connection. Check the URL: jdbc:myql://localhost:3306/spark?useSSL=false
>>> spark.read.format('jdbc').option('driver',"com.mysql.cj.jdbc.Driver").option('url','jdbc:mysql://localhost:3306/spark?useSSL=false').option('dbtable','student').option('user','root').option('password','990707').load().show()
+---+---------+------+---+
| id| name|gender|age|
+---+---------+------+---+
| 1|yuhaoming| F| 23|
+---+---------+------+---+
>>> from pyspark.sql.types import Row,StructType,StructField,StringType,IntegerType
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))rowRDD=studentRDD.map(lambda p;Row(int(p[o].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
File "<stdin>", line 1
studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))rowRDD=studentRDD.map(lambda p;Row(int(p[o].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
^
SyntaxError: invalid syntax
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))
File "<stdin>", line 1
studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))
^
SyntaxError: invalid syntax
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]).map(1ambda line:line.split(’ '))
File "<stdin>", line 1
studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]).map(1ambda line:line.split(’ '))
^
SyntaxError: invalid syntax
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]).map(1ambda line:line.split(' '))
File "<stdin>", line 1
studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]).map(1ambda line:line.split(' '))
^
SyntaxError: invalid syntax
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]).map(lambda line:line.split(' '))
>>> studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))rowRDD=studentRDD.map(lambda p;Row(int(p[o].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
File "<stdin>", line 1
studentRDD=spark.sparkContext.parallelize(["3 zhao M 26","4 wang M 27"]). map(1ambda line:line.split(’ '))rowRDD=studentRDD.map(lambda p;Row(int(p[o].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
^
SyntaxError: invalid syntax
>>> schema = StructType([StructField("id", IntegerType () , True),
... StructField("name" ,StringType() , True),StructField("gender" ,StringType () , True),StructField("age" ,IntegerType() , True)])
File "<stdin>", line 2
StructField("name" ,StringType() , True),StructField("gender" ,StringType () , True),StructField("age" ,IntegerType() , True)])
^
SyntaxError: invalid character in identifier
>>> rowRDD=studentRDD.map(lambda p:Row(int(p[o].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
>>> schema = StructType([StructField("id", IntegerType () , True),
... StructField("name" ,StringType() , True),StructField("gender" ,StringType () , True),StructField("age" ,IntegerType() , True)])
File "<stdin>", line 2
StructField("name" ,StringType() , True),StructField("gender" ,StringType () , True),StructField("age" ,IntegerType() , True)])
^
SyntaxError: invalid character in identifier
>>> schema = StructType([StructField("id",IntegerType () , True),StructField("name" ,StringType() , True),StructField("gender" ,StringType () , True),StructField("age",IntegerType(),True)])
>>> studentDF=spark.createDataFrame( rowRDD,schema)
>>> prop={'user':'root','password':'990707','driver':'com.mysql.cj.jdbc.Driver'}
>>> studentDF.write.jdbc('jdbc:mysql://localhost:3306/spark?useSSL=false','student','append',prop)
>>> spark.read.format('jdbc').option('driver',"com.mysql.cj.jdbc.Driver").option('url','jdbc:mysql://localhost:3306/spark?useSSL=false').option('dbtable','student').option('user','root').option('password','990707').load().show()
+---+---------+------+---+
| id| name|gender|age|
+---+---------+------+---+
| 1|yuhaoming| F| 23|
+---+---------+------+---+
>>>

浙公网安备 33010602011771号