北京市政百姓信件可视化展示

这是原网站:https://www.beijing.gov.cn/hudong/hdjl/com.web.search.mailList.flow

点击查询后我们可以看到其中的相应信息,{page: {pageNo:'1', totalCount:'809', totalPages:'135', pageSize:'6'}, result: .....}

这里可以看到一共有809封信件,并且根据不同的分类,信件的url地址的不一样的

我们可以根据id遍历得到所有信件的详细内容

https://www.beijing.gov.cn/hudong/hdjl/com.web.consult.consultDetail.flow?originalId=AH23081610397

https://www.beijing.gov.cn/hudong/hdjl/com.web.suggest.suggesDetail.flow?originalId=AH23070511063

在爬取完网页数据之后保存每个页面

打开虚拟机和hadoop集群将网页数据上传到hdfs当中

编写mapreduce程序对信件进行数据清洗

map

package org.example;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class LetterMapper extends Mapper<Text,Text,Text,Text> {

    private Text outK=new Text();
    private List<Text> outV=new ArrayList<>();
    private String page;
    private Document doc;
    @Override
    protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
        page=value.toString();
        doc= Jsoup.parse(page);
        String url=doc.getElementsByTag("url").first().text();
        String letterType=null;
        String letterTitle=null;
        String replyOrganization=null;
        if(url.contains("consult")) letterType="咨询";
        else letterType="建议";
        String id=url.substring(url.lastIndexOf("=") + 1);
        Elements strong = doc.getElementsByTag("strong");
        letterTitle=strong.get(0).text();
        replyOrganization=strong.get(1).text();
        String letterRecipient=doc.getElementsByClass("col-xs-10 col-lg-3 col-sm-3 col-md-4 text-muted").first().text().substring(4);
        String letterDate=doc.getElementsByClass("col-xs-5 col-lg-3 col-sm-3 col-md-3 text-muted").first().text().substring(3);
        String letterContent=doc.getElementsByClass("col-xs-12 col-md-12 column p-2 text-muted mx-2").first().text();
        String replyDate=doc.getElementsByClass("col-xs-12 col-sm-3 col-md-3 my-2").first().text().substring(5);
        String replyContent=doc.getElementsByClass("col-xs-12 col-md-12 column p-4 text-muted my-3").first().text();
        outK.set(id);
        outV.add(new Text(replyContent));
        outV.add(new Text(replyDate));
        outV.add(new Text(replyOrganization));
        outV.add(new Text(letterContent));
        outV.add(new Text(letterDate));
        outV.add(new Text(letterRecipient));
        outV.add(new Text(letterTitle));
        outV.add(new Text(letterType));
        outV.add(new Text(id));
        for(Text i:outV){
            context.write(outK,i);
        }
    }
}

reduce

package org.example;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class LetterReducer extends Reducer<Text,Text, NullWritable,Text> {
    private Text outV=new Text();

    @Override
    protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, NullWritable, Text>.Context context) throws IOException, InterruptedException {
        StringBuilder s=new StringBuilder();
        for(Text i:values){
            s.append(i.toString()+"\t");
        }
        outV.set(s.toString());
        context.write(NullWritable.get(),outV);
    }
}

自定义文件切片,按一个文件的形式进行切片

package org.example;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;
import java.util.List;

public class LetterFileInputFormat extends FileInputFormat<Text, Text> {
    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        // 返回一个自定义的InputSplit列表,这里将整个文件作为一个切片
        List<InputSplit> splits = super.getSplits(job);
        return splits;
    }

    @Override
    public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        // 返回一个自定义的RecordReader,这里将整个文件的内容作为一个Value
        LetterFileRecordReader reader = new LetterFileRecordReader();
        reader.initialize(split, context);
        return reader;
    }

    // 自定义RecordReader,将整个文件的内容作为一个Value
    public static class LetterFileRecordReader extends RecordReader<Text, Text> {
        private Path file;
        private Configuration conf;
        private boolean processed = false;
        private Text key;
        private Text value;

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            file = ((FileSplit) split).getPath();
            conf = context.getConfiguration();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (!processed) {
                byte[] contents = new byte[(int) file.getFileSystem(conf).getFileStatus(file).getLen()];
                file.getFileSystem(conf).open(file).readFully(0, contents);
                String contentString = new String(contents);
                key = new Text(file.getName());
                value = new Text(contentString);
                processed = true;
                return true;
            }
            return false;
        }

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return key;
        }

        @Override
        public Text getCurrentValue() throws IOException, InterruptedException {
            return value;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return processed ? 1.0f : 0.0f;
        }

        @Override
        public void close() throws IOException {
            //不进行操作
        }
    }
}

导入打包工具

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.8.1</version>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-assembly-plugin</artifactId>
            <version>3.3.0</version>
            <configuration>
                <descriptorRefs>
                    <descriptorRef>
                        jar-with-dependencies
                    </descriptorRef>
                </descriptorRefs>
            </configuration>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

然后进行打包,点击clean->package

然后将包上传到虚拟机当中,我改名为LP.jar

启动命令hadoop jar LP.jar org.example.Main /user/bill/letter_htmls/www.beijing.gov.cn/  /user/bill/letter_res

得到清洗后的结果将结果导入到hive表中,并查询得到想要的信息

利用sqoop将得到的数据导出到mysql中

编写页面进行数据展示

几个注意事项

1.jdbc:mysql://billsaifu:3306/school?useSSL=false&characterEncoding=UTF-8&allowMultiQueries=true

allowMultiQueries=true这个设置,在mybatis中就可以同时执行多条sql语句

2.导入hive-jdbc包的时候需要额外添加一个配置

<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-jdbc</artifactId>
    <version>3.1.3</version>
    <exclusions>
        <exclusion>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-runner</artifactId>
        </exclusion>
    </exclusions>
</dependency>

下面给出源码

爬取

package com.example.letterproject.utils;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

public class LetterCrawler implements PageProcessor {
    private Site site=new Site();
    private static int num=0;
    @Override
    public void process(Page page) {
        System.out.println(++num);
        page.putField("html",page.getRawText());
    }

    @Override
    public Site getSite() {
        site.setCharset("UTF-8");
        site.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188");
        return site;
    }
}
package com.example.letterproject.utils;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import us.codecraft.webmagic.Spider;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class LetterUtils {


    private static String get_json(String s){
        s=s.replace("page:","\"page\":");
        s=s.replace("pageNo:","\"pageNo\":");
        s=s.replace("totalCount:","\"totalCount\":");
        s=s.replace("totalPages:","\"totalPages\":");
        s=s.replace("pageSize:","\"pageSize\":");
        s=s.replace("result:","\"result\":");
        s=s.replace("originalId:","\"originalId\":");
        s=s.replace("letterType:","\"letterType\":");
        s=s.replace("letterTypeName:","\"letterTypeName\":");
        s=s.replace("letterTitle:","\"letterTitle\":");
        s=s.replace("showLetterTitle:","\"showLetterTitle\":");
        s=s.replace("writeDate:","\"writeDate\":");
        s=s.replace("orgNames:","\"orgNames\":");
        s=s.replace("showOrgNames:","\"showOrgNames\":");
        s=s.replace("'","\"");
        return s;
    }
    @SneakyThrows

    public void loadLetters(){
        Document start_page = Jsoup.parse(new URL("https://www.beijing.gov.cn/hudong/hdjl/sindex/bjah-index-hdjl!letterListJson.action?keyword=&startDate=&endDate=&letterType=0&page.pageNo=1&page.pageSize=0&orgtitleLength=26"), 30000);
        String json_start=start_page.text();
        json_start=get_json(json_start);
        ObjectMapper objectMapper=new ObjectMapper();
        JsonNode jsonNode=objectMapper.readTree( json_start);
        String num= String.valueOf(jsonNode.get("page").get("totalCount")).replace("\"","");
        Document end_page=Jsoup.parse(new URL("https://www.beijing.gov.cn/hudong/hdjl/sindex/bjah-index-hdjl!letterListJson.action?keyword=&startDate=&endDate=&letterType=0&page.pageNo=1&page.pageSize="+num+"&orgtitleLength=26"),3000);
        String json_end=end_page.text();
        json_end=get_json(json_end);
        jsonNode=objectMapper.readTree(json_end);
        ArrayNode arrayNode= (ArrayNode) jsonNode.get("result");
        List<String> url_list=new ArrayList<>();
        for(JsonNode i:arrayNode){
            String id=i.get("originalId").toString().replace("\"","");
            String kind=i.get("letterTypeName").toString().replace("\"","");
            if(kind.equals("咨询")) {
                url_list.add("https://www.beijing.gov.cn/hudong/hdjl/com.web.consult.consultDetail.flow?originalId="+id);
            }
            else {
                url_list.add("https://www.beijing.gov.cn/hudong/hdjl/com.web.suggest.suggesDetail.flow?originalId="+id);
            }
        }
        String[] urls=url_list.toArray(new String[0]);
        Spider.create(new LetterCrawler())
                .thread(100)
                .addUrl(urls)
                .addPipeline(new LetterFilePipeline("LetterProject/src/main/resources/static/data/letter_htmls/"))
                .run();

    }

}
 
hdfs操作
package com.example.letterproject.utils;
import lombok.SneakyThrows;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

import java.net.URI;

public class LetterHDFS {
    private FileSystem fs = null;

    @SneakyThrows
    public void init(){
        Configuration conf = new Configuration();
        conf.set("dfs.client.use.datanode.hostname", "true");
        conf.set("fs.defaultFS", "hdfs://billsaifu:9000");
        URI uri=new URI("hdfs://billsaifu:9000");
        String username="bill";
        fs=FileSystem.get(uri,conf,username);
    }
    @SneakyThrows
    public void Mkdir(String filepath){
        fs.mkdirs(new Path(filepath));
    }
    @SneakyThrows
    public void CpFile(String localPath,String filePath){
        fs.copyFromLocalFile(false,true,new Path(localPath),new Path(filePath));
    }
    @SneakyThrows
    public void GetFile(String filePath,String localPath){
        fs.copyToLocalFile(false,new Path(filePath),new Path(localPath),true);
    }
    @SneakyThrows
    public void DelFile(String filePath){
        fs.delete(new Path(filePath));
    }
    @SneakyThrows
    public void MvFile(String OldFilePath,String NewFilePath){
        fs.rename(new Path(OldFilePath),new Path(NewFilePath));
    }
    @SneakyThrows
    public void FileDetail(String filePath){
        RemoteIterator<LocatedFileStatus> listFiles=fs.listFiles(new Path(filePath),true);
        while (listFiles.hasNext()){
            LocatedFileStatus one=listFiles.next();
            System.out.println("文件路径: "+one.getPath());
            System.out.println("文件名: "+one.getPath().getName());
            System.out.println("文件所有者: "+one.getOwner());
            System.out.println("文件所有组: "+one.getGroup());
            System.out.println("文件大小: "+one.getLen());
        }
    }
    @SneakyThrows
    public boolean isFile(String filePath){
        return fs.exists(new Path(filePath));
    }
    @SneakyThrows
    public void closeFS(){
        if(fs!=null) fs.close();
    }
}

hive操作

package com.example.letterproject.utils;

import com.example.letterproject.pojo.Letter;
import lombok.SneakyThrows;

import java.sql.*;
import java.util.ArrayList;
import java.util.List;

public class LetterHive {
    private  String hiveUrl="jdbc:hive2://billsaifu:10000/school";
    private  String username="bill";
    private   Connection connection;
    private  PreparedStatement preparedStatement;
    @SneakyThrows
    public void getConnection(){
        Class.forName("org.apache.hive.jdbc.HiveDriver");
        connection= DriverManager.getConnection(hiveUrl,username,null);
    }
    @SneakyThrows
    public void createTable(){
        getConnection();
        String sql="create table if not exists letters (  " +
                "originalId STRING," +
                "letterType STRING," +
                "letterTitle STRING," +
                "letterRecipient STRING," +
                "letterDate STRING," +
                "letterContent STRING," +
                "replyOrganization STRING," +
                "replyDate STRING," +
                "replyContent STRING)" +
                "ROW FORMAT DELIMITED " +
                "FIELDS TERMINATED BY '\t'" +
                "STORED AS TEXTFILE";
        preparedStatement=connection.prepareStatement(sql);
        preparedStatement.executeUpdate();
    }
    @SneakyThrows
    public Boolean updateTable(String sql,String... p){
        getConnection();
        preparedStatement=connection.prepareStatement(sql,ResultSet.TYPE_SCROLL_INSENSITIVE,ResultSet.CONCUR_UPDATABLE);
        if(p!=null)
        for(int i=0;i<p.length;i++){
            preparedStatement.setString(i+1,p[i]);
        }
        boolean row=preparedStatement.execute();
        return row;
    }
    @SneakyThrows
    public ResultSet queryTable(String sql,String... p){
        getConnection();
        List<Letter> letterList=new ArrayList<>();
        preparedStatement=connection.prepareStatement(sql,ResultSet.TYPE_SCROLL_INSENSITIVE,ResultSet.CONCUR_UPDATABLE);
        if(p!=null)
            for(int i=0;i<p.length;i++){
                preparedStatement.setString(i+1,p[i]);
            }
        ResultSet resultSet=preparedStatement.executeQuery();
        return resultSet;
    }
    @SneakyThrows
    public void closeConnection(){
        if(preparedStatement!=null) preparedStatement.close();
        if(connection!=null) connection.close();
    }

}
package com.example.letterproject.dao;

import com.example.letterproject.utils.LetterHive;

import java.sql.ResultSet;

public class LetterHiveDao{
    private LetterHive letterHive;

    public LetterHiveDao() {
        this.letterHive = new LetterHive();
    }


    public void createTable() {
        letterHive.createTable();
        letterHive.closeConnection();
    }


    public ResultSet selectLetter(String sql, String... p) {
        return letterHive.queryTable(sql,p);
    }


    public Boolean updateLetter(String sql,String... p) {
        return letterHive.updateTable(sql,p);
    }
    public void close(){
        letterHive.closeConnection();
    }
}
package com.example.letterproject.service;

import com.example.letterproject.dao.LetterHiveDao;

public class LetterHiveService {
    private LetterHiveDao letterHiveDao;

    public LetterHiveService() {
        this.letterHiveDao = new LetterHiveDao();
    }
    public void createLetterTable(){
        letterHiveDao.createTable();
    }

    public void loadLetter(String inpath) {
        String sql="load data inpath ? overwrite into table letters";
        letterHiveDao.updateLetter(sql,inpath);
        System.out.println("导入成功");
    }
    public void selectLetterByStartTime(){
        letterHiveDao.updateLetter("drop table letter_start_num");
        String sql="create table if not exists letter_start_num row format delimited fields terminated by '\t' stored as textfile as " +
                "select year(letterDate) as year,month(letterDate) as month,count(*) as letter_num from letters group by year(letterDate),month(letterDate) order by year(letterDate),month(letterDate)";
        letterHiveDao.updateLetter(sql);
        System.out.println("查询成功");
    }
    public void selectLetterByEndTime(){
        letterHiveDao.updateLetter("drop table letter_end_num");
        String sql= "create table if not exists letter_end_num row format delimited fields terminated by '\t' stored as textfile as " +
                "select year(replyDate) as year,month(replyDate) as month,count(*) as letter_num from letters group by year(replyDate),month(replyDate) order by year(replyDate),month(replyDate)";
        letterHiveDao.updateLetter(sql);
        System.out.println("查询成功");
    }
    public void selectLetterByToad(){
        letterHiveDao.updateLetter("drop table letter_toad");
        String sql= "create table if not exists letter_toad row format delimited fields terminated by '\t' stored as textfile as " +
                "select datediff(replyDate,letterDate) as toad,count(*) as toad_num from letters group by datediff(replyDate,letterDate) order by datediff(replyDate,letterDate)";
        letterHiveDao.updateLetter(sql);
        System.out.println("查询成功");
    }
    public void selectLetterByOrganization(){
        letterHiveDao.updateLetter("drop table letter_organization");
        String sql= "create table if not exists letter_organization row format delimited fields terminated by '\t' stored as textfile as " +
                "select replyOrganization as organization,count(*) as letter_num from letters group by replyOrganization order by replyOrganization";
        letterHiveDao.updateLetter(sql);
        System.out.println("查询成功");
    }
    public void selectLetterByType(){
        letterHiveDao.updateLetter("drop table letter_type");
        String sql= "create table if not exists letter_type row format delimited fields terminated by '\t' stored as textfile as " +
                "select letterType as type,count(*) as letter_num from letters group by letterType order by letterType";
        letterHiveDao.updateLetter(sql);
        System.out.println("查询成功");
    }

}

数据初始化

package com.example.letterproject.utils;

import com.example.letterproject.service.LetterDateService;
import com.example.letterproject.service.LetterHiveService;
import com.jcraft.jsch.ChannelExec;
import com.jcraft.jsch.JSch;
import com.jcraft.jsch.Session;
import lombok.SneakyThrows;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;


import java.io.InputStream;

@Component
public class LetterData implements CommandLineRunner {

    @Autowired
    private LetterDateService letterDateService;


    @SneakyThrows
    private  boolean sshHost(String commond){
        JSch jsch = new JSch();
        Session session = jsch.getSession("bill","billsaifu", 22);
        session.setPassword("732338737q");
        session.setConfig("StrictHostKeyChecking", "no");
        session.connect();

        ChannelExec channelExec = (ChannelExec) session.openChannel("exec");
        channelExec.setCommand(commond);

        InputStream in = channelExec.getInputStream();
        channelExec.connect();
        boolean flag=false;

        byte[] tmp = new byte[1024];
        while (true) {
            while (in.available() > 0) {
                int i = in.read(tmp, 0, 1024);
                if (i < 0) break;
                System.out.print(new String(tmp, 0, i));
            }
            if (channelExec.isClosed()) {
                if (in.available() > 0) continue;
                flag=channelExec.getExitStatus()==0;
                System.out.println("Exit status: " + channelExec.getExitStatus());
                break;
            }
        }
        channelExec.disconnect();
        session.disconnect();
        return flag;
    }
    @SneakyThrows
    public  void  initData() {
        boolean flag=false;
        System.out.println("==================开始爬取信件数据==================");
        LetterUtils letterUtils=new LetterUtils();
        letterUtils.loadLetters();
        System.out.println("==================爬取完成==================");
        System.out.println("本地上传数据到HDFS");
        LetterHDFS letterHDFS=new LetterHDFS();
        letterHDFS.init();
        letterHDFS.DelFile("/user/bill/letter_res");
        letterHDFS.CpFile("LetterProject/src/main/resources/static/data/letter_htmls","/user/bill/");
        System.out.println("==================网页数据上传完成==================");
        System.out.println("==================开始mapreduce清洗数据");
        flag=sshHost("cd test;hadoop jar LP.jar org.example.Main /user/bill/letter_htmls/www.beijing.gov.cn/  /user/bill/letter_res");
        if(flag) System.out.println("==================mapreduce成功==================");
        else {
            System.out.println("==================mapreduce失败==================");
            return;
        }
        LetterHiveService letterHiveService=new LetterHiveService();
        letterHiveService.createLetterTable();
        System.out.println("==================hive表格创建成功==================");

        if(letterHDFS.isFile("/user/bill/letter_res/part-r-00000"))
        letterHiveService.loadLetter("/user/bill/letter_res/part-r-00000");
        else {
            System.out.println("==================part-r-00000不存在==================");
            return;
        }
        letterHDFS.closeFS();
        letterHiveService.selectLetterByStartTime();
        letterHiveService.selectLetterByEndTime();
        letterHiveService.selectLetterByType();
        letterHiveService.selectLetterByToad();
        letterHiveService.selectLetterByOrganization();
        System.out.println("==================hive表格数据上传成功==================");
        letterDateService.init();
        flag=sshHost("sqoop export  " +
                "  --connect jdbc:mysql://billsaifu:3306/school?characterEncoding=utf8 " +
                "  --username root " +
                "  --password 123456 " +
                "  --table letter_start_num " +
                "  --export-dir /user/hive/warehouse/school.db/letter_start_num " +
                "  --input-fields-terminated-by \"\\t\" ");
        if (flag){
            System.out.println("==================letter_start_num导出成功==================");
        }
        else {
            System.out.println("==================letter_start_num导出失败==================");
            return;
        }
        flag=sshHost("sqoop export  " +
                "  --connect jdbc:mysql://billsaifu:3306/school?characterEncoding=utf8 " +
                "  --username root " +
                "  --password 123456 " +
                "  --table letter_end_num " +
                "  --export-dir /user/hive/warehouse/school.db/letter_end_num " +
                "  --input-fields-terminated-by \"\\t\" ");
        if (flag){
            System.out.println("==================letter_end_num导出成功==================");
        }
        else {
            System.out.println("==================letter_end_num导出失败==================");
            return;
        }
        flag=sshHost("sqoop export  " +
                "  --connect jdbc:mysql://billsaifu:3306/school?characterEncoding=utf8 " +
                "  --username root " +
                "  --password 123456 " +
                "  --table letter_type " +
                "  --export-dir /user/hive/warehouse/school.db/letter_type " +
                "  --input-fields-terminated-by \"\\t\" ");
        if (flag){
            System.out.println("==================letter_type导出成功==================");
        }
        else {
            System.out.println("==================letter_type导出失败==================");
            return;
        }

        flag=sshHost("sqoop export  " +
                "  --connect jdbc:mysql://billsaifu:3306/school?characterEncoding=utf8 " +
                "  --username root " +
                "  --password 123456 " +
                "  --table letter_toad " +
                "  --export-dir /user/hive/warehouse/school.db/letter_toad " +
                "  --input-fields-terminated-by \"\\t\" ");
        if (flag){
            System.out.println("==================letter_toad导出成功==================");
        }
        else {
            System.out.println("==================letter_toad导出失败==================");
            return;
        }

        flag=sshHost("sqoop export  " +
                "  --connect jdbc:mysql://billsaifu:3306/school?characterEncoding=utf8 " +
                "  --username root " +
                "  --password 123456 " +
                "  --table letter_organization " +
                "  --export-dir /user/hive/warehouse/school.db/letter_organization " +
                "  --input-fields-terminated-by \"\\t\" ");
        if (flag){
            System.out.println("==================letter_organization导出成功==================");
        }
        else {
            System.out.println("==================letter_organization导出失败==================");
            return;
        }
    }

    @Override
    public void run(String... args) throws Exception {
        System.out.println("==================初始化获取百姓信件数据==================");
        initData();
        System.out.println("==================信件数据初始化成功==================");
    }
}

上面是部分源码

posted @ 2023-08-31 22:06  突破铁皮  阅读(41)  评论(0)    收藏  举报