24.10.29
实验3
熟悉常用的HBase操作
1.实验目的
(1)理解HBase在Hadoop体系结构中的角色;
(2)熟练使用HBase操作常用的Shell命令;
(3)熟悉HBase操作常用的Java API。
2.实验平台
(1)操作系统:Linux(建议Ubuntu16.04或Ubuntu18.04);
(2)Hadoop版本:2.7.3;
(3)HBase版本:2.7.3;
(4)JDK版本:1.8;
(5)Java IDE:IDEA。
3. 实验步骤
(一)编程实现以下指定功能,并用Hadoop提供的HBase Shell命令完成相同任务:
(1) 列出HBase所有的表的相关信息,例如表名;
list
(2) 在终端打印出指定的表的所有记录数据;
scan '表名'
(3) 向已经创建好的表添加和删除指定的列族或列;
disable '表名'
alter '表名', NAME => '新列族'
enable '表名'
(4) 清空指定的表的所有记录数据;
truncate '表名'
(5) 统计表的行数。
count '表名'
和hdfs操作一样,先将虚机上所有库下载下来导入添加为库
(6) package org.example.hbase;
import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HBaseOperations {
private static Connection connection;
private static Admin admin;
static {
try {
connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
// (1) 列出所有表信息
public static void listTables() throws IOException {
HTableDescriptor[] tableDescriptors = admin.listTables();
System.out.println("HBase Tables:");
for (HTableDescriptor table : tableDescriptors) {
System.out.println(" - " + table.getTableName());
}
}
// (2) 打印指定表的所有记录
public static void printTable(String tableName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
System.out.println("Row: " + Bytes.toString(result.getRow()));
for (Cell cell : result.rawCells()) {
System.out.println(" - Column: " + Bytes.toString(CellUtil.cloneFamily(cell)) + ":" +
Bytes.toString(CellUtil.cloneQualifier(cell)) +
", Value: " + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
table.close();
}
// (3) 添加和删除列族
public static void modifyColumnFamily(String tableName, String columnFamily, boolean add) throws IOException {
TableName table = TableName.valueOf(tableName);
if (!admin.tableExists(table)) {
System.out.println("Table does not exist.");
return;
}
if (add) {
HColumnDescriptor columnDescriptor = new HColumnDescriptor(columnFamily);
admin.addColumn(table, columnDescriptor);
System.out.println("Added column family: " + columnFamily);
} else {
admin.deleteColumn(table, Bytes.toBytes(columnFamily));
System.out.println("Deleted column family: " + columnFamily);
}
}
// (4) 清空表中的所有记录
public static void truncateTable(String tableName) throws IOException {
TableName table = TableName.valueOf(tableName);
if (admin.tableExists(table)) {
admin.disableTable(table);
admin.truncateTable(table, true);
System.out.println("Table " + tableName + " truncated.");
} else {
System.out.println("Table does not exist.");
}
}
// (5) 统计表的行数
public static void countRows(String tableName) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
int rowCount = 0;
for (Result ignored : scanner) {
rowCount++;
}
System.out.println("Total rows in table " + tableName + ": " + rowCount);
table.close();
}
// 关闭连接
public static void closeConnection() {
try {
if (admin != null) admin.close();
if (connection != null) connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
try {
// 测试功能
String tableName = "shujuku";
listTables();
printTable(tableName);
modifyColumnFamily(tableName, "NewColumnFamily", true);
modifyColumnFamily(tableName, "NewColumnFamily2", true);
modifyColumnFamily(tableName, "NewColumnFamily", false);
truncateTable(tableName);
countRows(tableName);
} catch (IOException e) {
e.printStackTrace();
} finally {
closeConnection();
}
}
}
(二)HBase数据库操作
- 现有以下关系型数据库中的表和数据(见表14-3到表14-5),要求将其转换为适合于HBase存储的表并插入数据:
表14-3 学生表(Student)
学号(S_No) 姓名(S_Name) 性别(S_Sex) 年龄(S_Age)
2015001 Zhangsan male 23
2015002 Mary female 22
2015003 Lisi male 24
表14-4 课程表(Course)
课程号(C_No) 课程名(C_Name) 学分(C_Credit)
123001 Math 2.0
123002 Computer Science 5.0
123003 English 3.0
表14-5 选课表(SC)
学号(SC_Sno) 课程号(SC_Cno) 成绩(SC_Score)
2015001 123001 86
2015001 123003 69
2015002 123002 77
2015002 123003 99
2015003 123001 98
2015003 123002 95
package org.example.hbase;
import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HBaseOperations1 {
private static Connection connection;
private static Admin admin;
static {
try {
connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
// 创建表
public static void createTable(String tableName, String[] columnFamilies) throws IOException {
TableName table = TableName.valueOf(tableName);
if (admin.tableExists(table)) {
System.out.println("Table " + tableName + " exists, deleting...");
admin.disableTable(table);
admin.deleteTable(table);
}
HTableDescriptor tableDescriptor = new HTableDescriptor(table);
for (String cf : columnFamilies) {
tableDescriptor.addFamily(new HColumnDescriptor(cf));
}
admin.createTable(tableDescriptor);
System.out.println("Table " + tableName + " created successfully.");
}
// 插入数据
public static void addRecord(String tableName, String rowKey, String columnFamily, String column, String value) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
table.put(put);
table.close();
System.out.println("Inserted record into table " + tableName + ": RowKey=" + rowKey + ", " + columnFamily + ":" + column + "=" + value);
}
// 关闭连接
public static void closeConnection() {
try {
if (admin != null) admin.close();
if (connection != null) connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
try {
// 1. 创建学生表
String studentTable = "Student";
String[] studentCF = {"info"};
createTable(studentTable, studentCF);
// 插入学生表数据
addRecord(studentTable, "2015001", "info", "name", "Zhangsan");
addRecord(studentTable, "2015001", "info", "sex", "male");
addRecord(studentTable, "2015001", "info", "age", "23");
addRecord(studentTable, "2015002", "info", "name", "Mary");
addRecord(studentTable, "2015002", "info", "sex", "female");
addRecord(studentTable, "2015002", "info", "age", "22");
addRecord(studentTable, "2015003", "info", "name", "Lisi");
addRecord(studentTable, "2015003", "info", "sex", "male");
addRecord(studentTable, "2015003", "info", "age", "24");
// 2. 创建课程表
String courseTable = "Course";
String[] courseCF = {"details"};
createTable(courseTable, courseCF);
// 插入课程表数据
addRecord(courseTable, "123001", "details", "name", "Math");
addRecord(courseTable, "123001", "details", "credit", "2.0");
addRecord(courseTable, "123002", "details", "name", "Computer Science");
addRecord(courseTable, "123002", "details", "credit", "5.0");
addRecord(courseTable, "123003", "details", "name", "English");
addRecord(courseTable, "123003", "details", "credit", "3.0");
// 3. 创建选课表
String scTable = "SC";
String[] scCF = {"score"};
createTable(scTable, scCF);
// 插入选课表数据
addRecord(scTable, "2015001:123001", "score", "score", "86");
addRecord(scTable, "2015001:123003", "score", "score", "69");
addRecord(scTable, "2015002:123002", "score", "score", "77");
addRecord(scTable, "2015002:123003", "score", "score", "99");
addRecord(scTable, "2015003:123001", "score", "score", "98");
addRecord(scTable, "2015003:123002", "score", "score", "95");
} catch (IOException e) {
e.printStackTrace();
} finally {
closeConnection();
}
}
}
- 请编程实现以下功能:
(1)createTable(String tableName, String[] fields)
创建表,参数tableName为表的名称,字符串数组fields为存储记录各个字段名称的数组。要求当HBase已经存在名为tableName的表的时候,先删除原有的表,然后再创建新的表。
(2)addRecord(String tableName, String row, String[] fields, String[] values)
向表tableName、行row(用S_Name表示)和字符串数组fields指定的单元格中添加对应的数据values。其中,fields中每个元素如果对应的列族下还有相应的列限定符的话,用“columnFamily:column”表示。例如,同时向“Math”、“Computer Science”、“English”三列添加成绩时,字符串数组fields为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组values存储这三门课的成绩。
(3)scanColumn(String tableName, String column)
浏览表tableName某一列的数据,如果某一行记录中该列数据不存在,则返回null。要求当参数column为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表的列的数据;当参数column为某一列具体名称(例如“Score:Math”)时,只需要列出该列的数据。
(4)modifyData(String tableName, String row, String column)
修改表tableName,行row(可以用学生姓名S_Name表示),列column指定的单元格的数据。
(5)deleteRow(String tableName, String row)
删除表tableName中row指定的行的记录。
package org.example.hbase;
import org.apache.hadoop.hbase.client.;
import org.apache.hadoop.hbase.;
import org.apache.hadoop.hbase.util.Bytes;
import org.example.hbase.HBaseConfig;
import java.io.IOException;
public class HBaseOperations2 {
private static Connection connection;
private static Admin admin;
static {
try {
connection = ConnectionFactory.createConnection(HBaseConfig.getConfig());
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
// 创建表
// 创建表
public static void createTable(String tableName, String[] fields) throws IOException {
TableName table = TableName.valueOf(tableName);
// 获取 Admin 对象
Admin admin = connection.getAdmin();
// 如果表存在,先删除表
if (admin.tableExists(table)) {
System.out.println("Table " + tableName + " already exists. Deleting it...");
admin.disableTable(table);
admin.deleteTable(table);
System.out.println("Deleted table " + tableName);
}
// 创建表描述符
HTableDescriptor tableDescriptor = new HTableDescriptor(table);
// 根据 fields 添加列族
for (String field : fields) {
String[] parts = field.split(":");
String columnFamily = parts[0];
if (!tableDescriptor.hasFamily(Bytes.toBytes(columnFamily))) {
tableDescriptor.addFamily(new HColumnDescriptor(columnFamily));
}
}
// 创建表
admin.createTable(tableDescriptor);
System.out.println("Table " + tableName + " created successfully.");
admin.close();
}
// 添加记录
// 添加记录
public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException {
if (fields.length != values.length) {
throw new IllegalArgumentException("Fields and values arrays must have the same length.");
}
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(row));
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
String[] parts = field.split(":");
if (parts.length != 2) {
throw new IllegalArgumentException("Invalid field format. Expected 'columnFamily:column', got: " + field);
}
String columnFamily = parts[0];
String column = parts[1];
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(values[i]));
}
table.put(put);
table.close();
System.out.println("Record added to table " + tableName + " with row key: " + row);
}
// 浏览列数据
public static void scanColumn(String tableName, String column) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
if (column.contains(":")) {
String[] parts = column.split(":");
String columnFamily = parts[0];
String qualifier = parts[1];
byte[] value = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier));
System.out.println("Row: " + Bytes.toString(result.getRow()) + ", Column: " + column + ", Value: " + Bytes.toString(value));
} else {
for (Cell cell : result.rawCells()) {
if (Bytes.toString(CellUtil.cloneFamily(cell)).equals(column)) {
System.out.println("Row: " + Bytes.toString(result.getRow()) + ", Column: " + Bytes.toString(CellUtil.cloneQualifier(cell)) + ", Value: " + Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
}
table.close();
}
// 修改数据
public static void modifyData(String tableName, String row, String column, String newValue) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
String[] parts = column.split(":");
String columnFamily = parts[0];
String qualifier = parts.length > 1 ? parts[1] : "";
Put put = new Put(Bytes.toBytes(row));
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(qualifier), Bytes.toBytes(newValue));
table.put(put);
table.close();
System.out.println("Data modified in table " + tableName);
}
// 删除行
public static void deleteRow(String tableName, String row) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(Bytes.toBytes(row));
table.delete(delete);
table.close();
System.out.println("Row " + row + " deleted from table " + tableName);
}
// 关闭连接
public static void closeConnection() {
try {
if (admin != null) admin.close();
if (connection != null) connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
try {
String tableName = "Students";
String[] fields = {"Score:Math", "Score:Computer Science", "Score:English"};
String[] values = {"85", "90", "88"};
String row = "John";
// 测试功能
createTable(tableName, fields);
addRecord(tableName, row, fields, values);
scanColumn(tableName, "Score:Math");
modifyData(tableName, row, "Score:Math", "95");
scanColumn(tableName, "Score:Math");
deleteRow(tableName, row);
} catch (IOException e) {
e.printStackTrace();
} finally {
closeConnection();
}
}
}

浙公网安备 33010602011771号