24.10.29

实验3
熟悉常用的HBase操作

1.实验目的
(1)理解HBase在Hadoop体系结构中的角色;
(2)熟练使用HBase操作常用的Shell命令;
(3)熟悉HBase操作常用的Java API。
2.实验平台
(1)操作系统:Linux(建议Ubuntu16.04或Ubuntu18.04);
(2)Hadoop版本:2.7.3;
(3)HBase版本:2.7.3;
(4)JDK版本:1.8;
(5)Java IDE:IDEA。
3. 实验步骤
(一)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:
(1) 列出HBase所有的表的相关信息,例如表名;
list
(2) 在终端打印出指定的表的所有记录数据;
scan '表名'

(3) 向已经创建好的表添加和删除指定的列族或列;
disable '表名'
alter '表名', NAME => '新列族'
enable '表名'
(4) 清空指定的表的所有记录数据;
truncate '表名'

(5) 统计表的行数。
count '表名'

和hdfs操作一样,先将虚机上所有库下载下来导入添加为库
(6) package org.example.hbase;

import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.
;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class HBaseOperations {
private static Connection connection;
private static Admin admin;

static {
    try {
        connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
        admin = connection.getAdmin();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

// (1) 列出所有表信息
public static void listTables() throws IOException {
    HTableDescriptor[] tableDescriptors = admin.listTables();
    System.out.println("HBase Tables:");
    for (HTableDescriptor table : tableDescriptors) {
        System.out.println(" - " + table.getTableName());
    }
}

// (2) 打印指定表的所有记录
public static void printTable(String tableName) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Scan scan = new Scan();
    ResultScanner scanner = table.getScanner(scan);

    for (Result result : scanner) {
        System.out.println("Row: " + Bytes.toString(result.getRow()));
        for (Cell cell : result.rawCells()) {
            System.out.println(" - Column: " + Bytes.toString(CellUtil.cloneFamily(cell)) + ":" +
                    Bytes.toString(CellUtil.cloneQualifier(cell)) +
                    ", Value: " + Bytes.toString(CellUtil.cloneValue(cell)));
        }
    }
    table.close();
}

// (3) 添加和删除列族
public static void modifyColumnFamily(String tableName, String columnFamily, boolean add) throws IOException {
    TableName table = TableName.valueOf(tableName);
    if (!admin.tableExists(table)) {
        System.out.println("Table does not exist.");
        return;
    }

    if (add) {
        HColumnDescriptor columnDescriptor = new HColumnDescriptor(columnFamily);
        admin.addColumn(table, columnDescriptor);
        System.out.println("Added column family: " + columnFamily);
    } else {

        admin.deleteColumn(table, Bytes.toBytes(columnFamily));
        System.out.println("Deleted column family: " + columnFamily);
    }
}

// (4) 清空表中的所有记录
public static void truncateTable(String tableName) throws IOException {
    TableName table = TableName.valueOf(tableName);
    if (admin.tableExists(table)) {
        admin.disableTable(table);
        admin.truncateTable(table, true);
        System.out.println("Table " + tableName + " truncated.");
    } else {
        System.out.println("Table does not exist.");
    }
}

// (5) 统计表的行数
public static void countRows(String tableName) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Scan scan = new Scan();
    ResultScanner scanner = table.getScanner(scan);

    int rowCount = 0;
    for (Result ignored : scanner) {
        rowCount++;
    }
    System.out.println("Total rows in table " + tableName + ": " + rowCount);
    table.close();
}

// 关闭连接
public static void closeConnection() {
    try {
        if (admin != null) admin.close();
        if (connection != null) connection.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

public static void main(String[] args) {
    try {
        // 测试功能
        String tableName = "shujuku";
        listTables();
        printTable(tableName);
        modifyColumnFamily(tableName, "NewColumnFamily", true);
        modifyColumnFamily(tableName, "NewColumnFamily2", true);
        modifyColumnFamily(tableName, "NewColumnFamily", false);
        truncateTable(tableName);
        countRows(tableName);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        closeConnection();
    }
}

}

(二)HBase数据库操作

  1. 现有以下关系型数据库中的表和数据(见表14-3到表14-5),要求将其转换为适合于HBase存储的表并插入数据:
    表14-3 学生表(Student)
    学号(S_No) 姓名(S_Name) 性别(S_Sex) 年龄(S_Age)
    2015001 Zhangsan male 23
    2015002 Mary female 22
    2015003 Lisi male 24

表14-4 课程表(Course)
课程号(C_No) 课程名(C_Name) 学分(C_Credit)
123001 Math 2.0
123002 Computer Science 5.0
123003 English 3.0

表14-5 选课表(SC)
学号(SC_Sno) 课程号(SC_Cno) 成绩(SC_Score)
2015001 123001 86
2015001 123003 69
2015002 123002 77
2015002 123003 99
2015003 123001 98
2015003 123002 95

package org.example.hbase;

import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.
;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class HBaseOperations1 {
private static Connection connection;
private static Admin admin;

static {
    try {
        connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
        admin = connection.getAdmin();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

// 创建表
public static void createTable(String tableName, String[] columnFamilies) throws IOException {
    TableName table = TableName.valueOf(tableName);
    if (admin.tableExists(table)) {
        System.out.println("Table " + tableName + " exists, deleting...");
        admin.disableTable(table);
        admin.deleteTable(table);
    }
    HTableDescriptor tableDescriptor = new HTableDescriptor(table);
    for (String cf : columnFamilies) {
        tableDescriptor.addFamily(new HColumnDescriptor(cf));
    }
    admin.createTable(tableDescriptor);
    System.out.println("Table " + tableName + " created successfully.");
}

// 插入数据
public static void addRecord(String tableName, String rowKey, String columnFamily, String column, String value) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Put put = new Put(Bytes.toBytes(rowKey));
    put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
    table.put(put);
    table.close();
    System.out.println("Inserted record into table " + tableName + ": RowKey=" + rowKey + ", " + columnFamily + ":" + column + "=" + value);
}

// 关闭连接
public static void closeConnection() {
    try {
        if (admin != null) admin.close();
        if (connection != null) connection.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
public static void main(String[] args) {
    try {
        // 1. 创建学生表
        String studentTable = "Student";
        String[] studentCF = {"info"};
        createTable(studentTable, studentCF);

        // 插入学生表数据
        addRecord(studentTable, "2015001", "info", "name", "Zhangsan");
        addRecord(studentTable, "2015001", "info", "sex", "male");
        addRecord(studentTable, "2015001", "info", "age", "23");

        addRecord(studentTable, "2015002", "info", "name", "Mary");
        addRecord(studentTable, "2015002", "info", "sex", "female");
        addRecord(studentTable, "2015002", "info", "age", "22");

        addRecord(studentTable, "2015003", "info", "name", "Lisi");
        addRecord(studentTable, "2015003", "info", "sex", "male");
        addRecord(studentTable, "2015003", "info", "age", "24");

        // 2. 创建课程表
        String courseTable = "Course";
        String[] courseCF = {"details"};
        createTable(courseTable, courseCF);

        // 插入课程表数据
        addRecord(courseTable, "123001", "details", "name", "Math");
        addRecord(courseTable, "123001", "details", "credit", "2.0");

        addRecord(courseTable, "123002", "details", "name", "Computer Science");
        addRecord(courseTable, "123002", "details", "credit", "5.0");

        addRecord(courseTable, "123003", "details", "name", "English");
        addRecord(courseTable, "123003", "details", "credit", "3.0");

        // 3. 创建选课表
        String scTable = "SC";
        String[] scCF = {"score"};
        createTable(scTable, scCF);

        // 插入选课表数据
        addRecord(scTable, "2015001:123001", "score", "score", "86");
        addRecord(scTable, "2015001:123003", "score", "score", "69");
        addRecord(scTable, "2015002:123002", "score", "score", "77");
        addRecord(scTable, "2015002:123003", "score", "score", "99");
        addRecord(scTable, "2015003:123001", "score", "score", "98");
        addRecord(scTable, "2015003:123002", "score", "score", "95");

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        closeConnection();
    }
}

}

  1. 请编程实现以下功能:
    (1)createTable(String tableName, String[] fields)
    创建表,参数tableName为表的名称,字符串数组fields为存储记录各个字段名称的数组。要求当HBase已经存在名为tableName的表的时候,先删除原有的表,然后再创建新的表。
    (2)addRecord(String tableName, String row, String[] fields, String[] values)
    向表tableName、行row(用S_Name表示)和字符串数组fields指定的单元格中添加对应的数据values。其中,fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“Math”、“Computer Science”、“English”三列添加成绩时,字符串数组fields为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组values存储这三门课的成绩。
    (3)scanColumn(String tableName, String column)
    浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:Math”)时,只需要列出该列的数据。
    (4)modifyData(String tableName, String row, String column)
    修改表tableName,行row(可以用学生姓名S_Name表示),列column指定的单元格的数据。
    (5)deleteRow(String tableName, String row)
    删除表tableName中row指定的行的记录。

package org.example.hbase;

import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.
;
import org.apache.hadoop.hbase.util.Bytes;
import org.example.hbase.HBaseConfig;

import java.io.IOException;

public class HBaseOperations2 {
private static Connection connection;
private static Admin admin;

static {
    try {
        connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
        admin = connection.getAdmin();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

// 创建表
// 创建表
public static void createTable(String tableName, String[] fields) throws IOException {
    TableName table = TableName.valueOf(tableName);

    // 获取 Admin 对象
    Admin admin = connection.getAdmin();

    // 如果表存在,先删除表
    if (admin.tableExists(table)) {
        System.out.println("Table " + tableName + " already exists. Deleting it...");
        admin.disableTable(table);
        admin.deleteTable(table);
        System.out.println("Deleted table " + tableName);
    }

    // 创建表描述符
    HTableDescriptor tableDescriptor = new HTableDescriptor(table);

    // 根据 fields 添加列族
    for (String field : fields) {
        String[] parts = field.split(":");
        String columnFamily = parts[0];
        if (!tableDescriptor.hasFamily(Bytes.toBytes(columnFamily))) {
            tableDescriptor.addFamily(new HColumnDescriptor(columnFamily));
        }
    }

    // 创建表
    admin.createTable(tableDescriptor);
    System.out.println("Table " + tableName + " created successfully.");

    admin.close();
}


// 添加记录
// 添加记录
public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException {
    if (fields.length != values.length) {
        throw new IllegalArgumentException("Fields and values arrays must have the same length.");
    }

    Table table = connection.getTable(TableName.valueOf(tableName));
    Put put = new Put(Bytes.toBytes(row));

    for (int i = 0; i < fields.length; i++) {
        String field = fields[i];
        String[] parts = field.split(":");
        if (parts.length != 2) {
            throw new IllegalArgumentException("Invalid field format. Expected 'columnFamily:column', got: " + field);
        }

        String columnFamily = parts[0];
        String column = parts[1];
        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(values[i]));
    }

    table.put(put);
    table.close();
    System.out.println("Record added to table " + tableName + " with row key: " + row);
}


// 浏览列数据
public static void scanColumn(String tableName, String column) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Scan scan = new Scan();
    ResultScanner scanner = table.getScanner(scan);
    for (Result result : scanner) {
        if (column.contains(":")) {
            String[] parts = column.split(":");
            String columnFamily = parts[0];
            String qualifier = parts[1];
            byte[] value = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier));
            System.out.println("Row: " + Bytes.toString(result.getRow()) + ", Column: " + column + ", Value: " + Bytes.toString(value));
        } else {
            for (Cell cell : result.rawCells()) {
                if (Bytes.toString(CellUtil.cloneFamily(cell)).equals(column)) {
                    System.out.println("Row: " + Bytes.toString(result.getRow()) + ", Column: " + Bytes.toString(CellUtil.cloneQualifier(cell)) + ", Value: " + Bytes.toString(CellUtil.cloneValue(cell)));
                }
            }
        }
    }
    table.close();
}

// 修改数据
public static void modifyData(String tableName, String row, String column, String newValue) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    String[] parts = column.split(":");
    String columnFamily = parts[0];
    String qualifier = parts.length > 1 ? parts[1] : "";
    Put put = new Put(Bytes.toBytes(row));
    put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier), Bytes.toBytes(newValue));
    table.put(put);
    table.close();
    System.out.println("Data modified in table " + tableName);
}

// 删除行
public static void deleteRow(String tableName, String row) throws IOException {
    Table table = connection.getTable(TableName.valueOf(tableName));
    Delete delete = new Delete(Bytes.toBytes(row));
    table.delete(delete);
    table.close();
    System.out.println("Row " + row + " deleted from table " + tableName);
}

// 关闭连接
public static void closeConnection() {
    try {
        if (admin != null) admin.close();
        if (connection != null) connection.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

public static void main(String[] args) {
    try {
        String tableName = "Students";
        String[] fields = {"Score:Math", "Score:Computer Science", "Score:English"};
        String[] values = {"85", "90", "88"};
        String row = "John";

        // 测试功能
        createTable(tableName, fields);
        addRecord(tableName, row, fields, values);
        scanColumn(tableName, "Score:Math");
        modifyData(tableName, row, "Score:Math", "95");
        scanColumn(tableName, "Score:Math");
        deleteRow(tableName, row);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        closeConnection();
    }
}

}

posted @ 2024-10-29 20:05  起名字真难_qmz  阅读(12)  评论(0)    收藏  举报