2024.7.27(hadoop学习第三周)

本周将黑马程序员视频的Hadoop教学视频里的后续部分,学习到了在大数据使用中内部表、外部表、分区表和分桶表的定义和使用,同时了解了在大数据中每一个hive表其实都是一个文件夹,同时学习到了如何通过文件夹将数据导入到表中。

在学习到可以通过文件夹将数据导入到表中这一知识后,我对于上周20号验收时老师让我将数据插入时间过长的优化问题有了一些想法,是否可以通过将网页表单提交的数据通过java代码存入到windows系统下的本地文本文件中,再将这个文件上传到Linux系统上,将此文件内的数据导入到Linux系统下的大数据表中。

在通过不断查阅资料和代码学习后,我成功做到了优化,在通过文件导入的方式下数据载入变得快速很多,以下是相关代码:

<%--
  Created by IntelliJ IDEA.
  User: 贾贾鱼
  Date: 2024/7/25
  Time: 18:07
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<html>
<head>
    <title>通过文件进行插入数据(可加快插入速度)</title>
</head>
<body>
<form action="transfileServlet" method="post">
    <textarea name="data" rows="10" cols="30" placeholder="在这里输入数据"></textarea>
    <br>
    <button type="submit">提交</button>
</form>
</body>
</html>
package com.example.hive;

import com.jcraft.jsch.Channel;
import com.jcraft.jsch.ChannelSftp;
import com.jcraft.jsch.JSch;
import com.jcraft.jsch.Session;
import jakarta.servlet.ServletException;
import jakarta.servlet.annotation.MultipartConfig;
import jakarta.servlet.annotation.WebServlet;
import jakarta.servlet.http.HttpServlet;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.*;

@WebServlet("/transfileServlet")
@MultipartConfig
public class transfileServlet extends HttpServlet {
    protected void doPost(HttpServletRequest request, HttpServletResponse response)
            throws ServletException, IOException {
        String data = request.getParameter("data"); // 获取表单数据

        // 将数据写入文件
        File file = new File("C:\\Users\\jjc18\\Desktop\\data.txt"); // 指定文件路径
        try (FileOutputStream fos = new FileOutputStream(file)) {
            fos.write(data.getBytes());
        }

        // 上传文件到远程 Linux 服务器
        String remoteHost = "192.168.88.101";
        String username = "hadoop";
        String password = "123456";
        String remoteFilePath = "/home/hadoop/temp_test.txt";

        try {
            uploadToLinux(file.getAbsolutePath(), remoteFilePath, username, remoteHost, password);
            response.getWriter().println("文件已成功上传到Linux服务器!");
        } catch (Exception e) {
            e.printStackTrace();
            response.getWriter().println("文件上传失败: " + e.getMessage());
        }

        try {
            inserttext();
            response.getWriter().println("数据插入成功!");
            String sql = "select * from myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE

            try {
                // 加载Hive JDBC驱动
                Class.forName("org.apache.hive.jdbc.HiveDriver");

                // 创建数据库连接
                String url = "jdbc:hive2://node1:10000/myhive";
                String user = "hadoop";
                password = "";

                Connection conn = null;
                conn = DriverManager.getConnection(url, user, password);

                Statement statement = conn.createStatement();
                ResultSet resultSet=statement.executeQuery(sql);
                while (resultSet.next()){
                    response.getWriter().println(resultSet.getString("username"));
                    response.getWriter().println(resultSet.getString("password"));
                }

            } catch (SQLException | ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        } catch (Exception e) {
            e.printStackTrace();
            response.getWriter().println("数据插入失败: " + e.getMessage());
        }
    }

    private void uploadToLinux(String localFilePath, String remoteFilePath, String username, String host, String password) throws Exception {
        JSch jsch = new JSch();
        Session session = jsch.getSession(username, host, 22);
        session.setPassword(password);
        session.setConfig("StrictHostKeyChecking", "no"); // 忽略主机密钥检测
        session.connect();

        Channel channel = session.openChannel("sftp");
        channel.connect();

        ChannelSftp sftpChannel = (ChannelSftp) channel;
        sftpChannel.put(localFilePath, remoteFilePath);

        sftpChannel.exit();
        session.disconnect();
    }

    private void inserttext() {
        String sql = "LOAD DATA LOCAL INPATH '/home/hadoop/temp_test.txt' INTO TABLE myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE

        try {
            // 加载Hive JDBC驱动
            Class.forName("org.apache.hive.jdbc.HiveDriver");

            // 创建数据库连接
            String url = "jdbc:hive2://node1:10000/myhive";
            String user = "hadoop";
            String password = "";

            Connection conn = null;
            conn = DriverManager.getConnection(url, user, password);

            Statement statement = conn.createStatement();

            // 执行 LOAD DATA 命令
            statement.execute(sql);
            System.out.println("数据成功导入到表中。");
        } catch (SQLException | ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }
    private void select(){
        String sql = "select * from myhive.file_test";//若想覆盖原数据,在INTO前加上OVERWRITE

        try {
            // 加载Hive JDBC驱动
            Class.forName("org.apache.hive.jdbc.HiveDriver");

            // 创建数据库连接
            String url = "jdbc:hive2://node1:10000/myhive";
            String user = "hadoop";
            String password = "";

            Connection conn = null;
            conn = DriverManager.getConnection(url, user, password);

            Statement statement = conn.createStatement();
            ResultSet resultSet=statement.executeQuery(sql);
            while (resultSet.next()){

            }

        } catch (SQLException | ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }
}

建表的SQL语句:

create table myhive.file_test(
                                 username string comment '用户名',
                                 password string comment '密码'
) row format delimited fields terminated by ',';

 

posted @ 2024-07-27 21:13  贾贾鱼  阅读(11)  评论(0)    收藏  举报