spark

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.util.Arrays;

public class GetMultipleColumnsExample {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder()
                .appName("GetMultipleColumnsExample")
                .master("local[*]")
                .getOrCreate();

        // 示例DataFrame - 替换为你的数据源
        Dataset<Row> df = spark.createDataFrame(
                Arrays.asList(
                        RowFactory.create(1, "Alice", 30),
                        RowFactory.create(2, "Bob", 25)
                ),
                StructType.fromDDL("id INT, name STRING, age INT")
        );

        // 遍历DataFrame的每一行
        df.foreach(row -> {
            // 按索引获取
            int id = row.getInt(0);
            String name = row.getString(1);
            int age = row.getInt(2);

            // 按名称获取(需要类型转换)
            int idByName = (int) row.getAs("id");
            String nameByName = (String) row.getAs("name");
            int ageByName = (int) row.getAs("age");

            // 打印结果
            System.out.println("By Index: id=" + id + ", name=" + name + ", age=" + age);
            System.out.println("ByName: id=" + idByName + ", name=" + nameByName + ", age=" + ageByName);
        });

        spark.stop();
    }
}
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.Arrays;

public class SparkUpdateMySQLWithForeach {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder()
                .appName("SparkUpdateMySQLWithForeach")
                .master("local[*]")
                .getOrCreate();

        // 示例DataFrame - 替换为你的数据源
        Dataset<Row> df = spark.createDataFrame(
                Arrays.asList(
                        RowFactory.create(1, "Alice"),
                        RowFactory.create(2, "Bob")
                ),
                StructType.fromDDL("id INT, name STRING")
        );

        // MySQL配置
        String jdbcUrl = "jdbc:mysql://localhost:3306/test_db";
        String username = "root";
        String password = "password";
        String tableName = "test_table";

        // 更新SQL
        String updateQuery = "UPDATE " + tableName + " SET name = ? WHERE id = ?";

        // 使用foreach遍历DataFrame并执行更新
        df.foreach(row -> {
            try (Connection conn = DriverManager.getConnection(jdbcUrl, username, password)) {
                try (PreparedStatement pstmt = conn.prepareStatement(updateQuery)) {
                    // 设置参数
                    pstmt.setString(1, row.getString(1)); // 设置name
                    pstmt.setInt(2, row.getInt(0));      // 设置id

                    // 执行更新
                    pstmt.executeUpdate();
                }
            } catch (Exception e) {
                e.printStackTrace();
                // 处理异常
            }
        });

        spark.stop();
    }
}

 

posted @ 2025-05-22 22:39  ---江北  阅读(23)  评论(0)    收藏  举报
TOP