2024.7.27(hadoop学习第三周)
本周将黑马程序员视频的Hadoop教学视频里的后续部分,学习到了在大数据使用中内部表、外部表、分区表和分桶表的定义和使用,同时了解了在大数据中每一个hive表其实都是一个文件夹,同时学习到了如何通过文件夹将数据导入到表中。
在学习到可以通过文件夹将数据导入到表中这一知识后,我对于上周20号验收时老师让我将数据插入时间过长的优化问题有了一些想法,是否可以通过将网页表单提交的数据通过java代码存入到windows系统下的本地文本文件中,再将这个文件上传到Linux系统上,将此文件内的数据导入到Linux系统下的大数据表中。
在通过不断查阅资料和代码学习后,我成功做到了优化,在通过文件导入的方式下数据载入变得快速很多,以下是相关代码:
<%-- Created by IntelliJ IDEA. User: 贾贾鱼 Date: 2024/7/25 Time: 18:07 To change this template use File | Settings | File Templates. --%> <%@ page contentType="text/html;charset=UTF-8" language="java" %> <html> <head> <title>通过文件进行插入数据(可加快插入速度)</title> </head> <body> <form action="transfileServlet" method="post"> <textarea name="data" rows="10" cols="30" placeholder="在这里输入数据"></textarea> <br> <button type="submit">提交</button> </form> </body> </html>
package com.example.hive; import com.jcraft.jsch.Channel; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.JSch; import com.jcraft.jsch.Session; import jakarta.servlet.ServletException; import jakarta.servlet.annotation.MultipartConfig; import jakarta.servlet.annotation.WebServlet; import jakarta.servlet.http.HttpServlet; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.sql.*; @WebServlet("/transfileServlet") @MultipartConfig public class transfileServlet extends HttpServlet { protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String data = request.getParameter("data"); // 获取表单数据 // 将数据写入文件 File file = new File("C:\\Users\\jjc18\\Desktop\\data.txt"); // 指定文件路径 try (FileOutputStream fos = new FileOutputStream(file)) { fos.write(data.getBytes()); } // 上传文件到远程 Linux 服务器 String remoteHost = "192.168.88.101"; String username = "hadoop"; String password = "123456"; String remoteFilePath = "/home/hadoop/temp_test.txt"; try { uploadToLinux(file.getAbsolutePath(), remoteFilePath, username, remoteHost, password); response.getWriter().println("文件已成功上传到Linux服务器!"); } catch (Exception e) { e.printStackTrace(); response.getWriter().println("文件上传失败: " + e.getMessage()); } try { inserttext(); response.getWriter().println("数据插入成功!"); String sql = "select * from myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE try { // 加载Hive JDBC驱动 Class.forName("org.apache.hive.jdbc.HiveDriver"); // 创建数据库连接 String url = "jdbc:hive2://node1:10000/myhive"; String user = "hadoop"; password = ""; Connection conn = null; conn = DriverManager.getConnection(url, user, password); Statement statement = conn.createStatement(); ResultSet resultSet=statement.executeQuery(sql); while (resultSet.next()){ response.getWriter().println(resultSet.getString("username")); response.getWriter().println(resultSet.getString("password")); } } catch (SQLException | ClassNotFoundException e) { throw new RuntimeException(e); } } catch (Exception e) { e.printStackTrace(); response.getWriter().println("数据插入失败: " + e.getMessage()); } } private void uploadToLinux(String localFilePath, String remoteFilePath, String username, String host, String password) throws Exception { JSch jsch = new JSch(); Session session = jsch.getSession(username, host, 22); session.setPassword(password); session.setConfig("StrictHostKeyChecking", "no"); // 忽略主机密钥检测 session.connect(); Channel channel = session.openChannel("sftp"); channel.connect(); ChannelSftp sftpChannel = (ChannelSftp) channel; sftpChannel.put(localFilePath, remoteFilePath); sftpChannel.exit(); session.disconnect(); } private void inserttext() { String sql = "LOAD DATA LOCAL INPATH '/home/hadoop/temp_test.txt' INTO TABLE myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE try { // 加载Hive JDBC驱动 Class.forName("org.apache.hive.jdbc.HiveDriver"); // 创建数据库连接 String url = "jdbc:hive2://node1:10000/myhive"; String user = "hadoop"; String password = ""; Connection conn = null; conn = DriverManager.getConnection(url, user, password); Statement statement = conn.createStatement(); // 执行 LOAD DATA 命令 statement.execute(sql); System.out.println("数据成功导入到表中。"); } catch (SQLException | ClassNotFoundException e) { throw new RuntimeException(e); } } private void select(){ String sql = "select * from myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE try { // 加载Hive JDBC驱动 Class.forName("org.apache.hive.jdbc.HiveDriver"); // 创建数据库连接 String url = "jdbc:hive2://node1:10000/myhive"; String user = "hadoop"; String password = ""; Connection conn = null; conn = DriverManager.getConnection(url, user, password); Statement statement = conn.createStatement(); ResultSet resultSet=statement.executeQuery(sql); while (resultSet.next()){ } } catch (SQLException | ClassNotFoundException e) { throw new RuntimeException(e); } } }
建表的SQL语句:
create table myhive.file_test( username string comment '用户名', password string comment '密码' ) row format delimited fields terminated by ',';
 
                    
                     
                    
                 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号