import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.util.Arrays;
public class GetMultipleColumnsExample {
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
.appName("GetMultipleColumnsExample")
.master("local[*]")
.getOrCreate();
// 示例DataFrame - 替换为你的数据源
Dataset<Row> df = spark.createDataFrame(
Arrays.asList(
RowFactory.create(1, "Alice", 30),
RowFactory.create(2, "Bob", 25)
),
StructType.fromDDL("id INT, name STRING, age INT")
);
// 遍历DataFrame的每一行
df.foreach(row -> {
// 按索引获取
int id = row.getInt(0);
String name = row.getString(1);
int age = row.getInt(2);
// 按名称获取(需要类型转换)
int idByName = (int) row.getAs("id");
String nameByName = (String) row.getAs("name");
int ageByName = (int) row.getAs("age");
// 打印结果
System.out.println("By Index: id=" + id + ", name=" + name + ", age=" + age);
System.out.println("ByName: id=" + idByName + ", name=" + nameByName + ", age=" + ageByName);
});
spark.stop();
}
}
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.Arrays;
public class SparkUpdateMySQLWithForeach {
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
.appName("SparkUpdateMySQLWithForeach")
.master("local[*]")
.getOrCreate();
// 示例DataFrame - 替换为你的数据源
Dataset<Row> df = spark.createDataFrame(
Arrays.asList(
RowFactory.create(1, "Alice"),
RowFactory.create(2, "Bob")
),
StructType.fromDDL("id INT, name STRING")
);
// MySQL配置
String jdbcUrl = "jdbc:mysql://localhost:3306/test_db";
String username = "root";
String password = "password";
String tableName = "test_table";
// 更新SQL
String updateQuery = "UPDATE " + tableName + " SET name = ? WHERE id = ?";
// 使用foreach遍历DataFrame并执行更新
df.foreach(row -> {
try (Connection conn = DriverManager.getConnection(jdbcUrl, username, password)) {
try (PreparedStatement pstmt = conn.prepareStatement(updateQuery)) {
// 设置参数
pstmt.setString(1, row.getString(1)); // 设置name
pstmt.setInt(2, row.getInt(0)); // 设置id
// 执行更新
pstmt.executeUpdate();
}
} catch (Exception e) {
e.printStackTrace();
// 处理异常
}
});
spark.stop();
}
}