实验2 Scala编程初级实践 实验4 RDD编程初级实践

image

import scala.io.StdIn

object Exercise2_1 {
  private var sn = 0.0
  private var n = 1
  
  def main(args: Array[String]): Unit = {
    println("请输入q的值(大于0的整数):")
    val q = StdIn.readDouble()
    
    while (sn < q) {
      sn += (n + 1.0) / n
      n += 1
    }

    // 因为循环结束时sn已经大于等于q,但我们要的是刚好大于等于q的值
    // 所以不需要调整
    println(f"Sn=$sn%.6f")

    // 测试样例验证
    println("\n测试样例验证:")
    testCase(1)  // 期望输出 2
    testCase(30) // 期望输出 30.891459
    testCase(50) // 期望输出 50.416695
  }

  private def testCase(q: Double): Unit = {
    
    while (sn < q) {
      sn += (n + 1.0) / n
      n += 1
    }

    println(f"q=$q 时,Sn=$sn%.6f")
  }
}

image

// Drawable特质
trait Drawable {
  def draw(): Unit = {
    println(this.toString)
  }
}

// Point类
case class Point(var x: Double, var y: Double) extends Drawable {
  def shift(deltaX: Double, deltaY: Double): Unit = {
    x += deltaX
    y += deltaY
  }

  override def toString: String = s"Point($x,$y)"
}

// Shape抽象类
abstract class Shape(var location: Point) {
  // 移动到新位置
  def moveTo(newPoint: Point): Unit = {
    location = newPoint
  }

  // 抽象方法:缩放
  def zoom(factor: Double): Unit
}

// Line类
class Line(start: Point, var end: Point) extends Shape(start) with Drawable {
  // 重写moveTo方法:移动整个线段
  override def moveTo(newPoint: Point): Unit = {
    val deltaX = newPoint.x - location.x
    val deltaY = newPoint.y - location.y
    location.shift(deltaX, deltaY)
    end.shift(deltaX, deltaY)
  }

  // 实现zoom方法:中点不变,长度缩放
  override def zoom(factor: Double): Unit = {
    // 计算中点
    val centerX = (location.x + end.x) / 2
    val centerY = (location.y + end.y) / 2

    // 缩放端点位置
    location = Point(
      centerX + (location.x - centerX) * factor,
      centerY + (location.y - centerY) * factor
    )
    end = Point(
      centerX + (end.x - centerX) * factor,
      centerY + (end.y - centerY) * factor
    )
  }

  // 重写draw方法
  override def draw(): Unit = {
    println(s"Line:(${location.x},${location.y})--(${end.x},${end.y})")
  }

  override def toString: String = s"Line:(${location.x},${location.y})--(${end.x},${end.y})"
}

// Circle类
class Circle(center: Point, var radius: Double) extends Shape(center) with Drawable {
  // 实现zoom方法:圆心不变,半径缩放
  override def zoom(factor: Double): Unit = {
    radius = radius * factor
  }

  // 重写draw方法
  override def draw(): Unit = {
    println(s"Circle center:(${location.x},${location.y}),R=$radius")
  }

  override def toString: String = s"Circle center:(${location.x},${location.y}),R=$radius"
}

// 主程序
object MyDraw {
  def main(args: Array[String]): Unit = {
    val p =  Point(10, 30)
    p.draw()

    val line1 = new Line(Point(0, 0), Point(20, 20))
    line1.draw()
    line1.moveTo(Point(5, 5)) // 移动到一个新的点
    line1.draw()
    line1.zoom(2) // 放大两倍
    line1.draw()

    val cir = new Circle(Point(10, 10), 5)
    cir.draw()
    cir.moveTo(Point(30, 20))
    cir.draw()
    cir.zoom(0.5)
    cir.draw()
  }
}

// 测试对象
object TestDrawable {
  def test(): Unit = {
    println("=== 测试图形绘制系统 ===")

    // 测试点
    val point = Point(5, 10)
    println("原始点:")
    point.draw()
    point.shift(3, 4)
    println("移动后的点:")
    point.draw()

    // 测试线段
    println("\n测试线段:")
    val line = new Line(Point(0, 0), Point(10, 10))
    line.draw()
    line.zoom(2)
    println("缩放2倍后:")
    line.draw()

    // 测试圆
    println("\n测试圆:")
    val circle = new Circle(Point(0, 0), 5)
    circle.draw()
    circle.zoom(3)
    println("缩放3倍后:")
    circle.draw()
  }
}

image

object GradeStatistics {

  private case class Student(id: String, gender: String, scores: Map[String, Double])

  // 从字符串解析数据
  private def parseData(data: String): (List[Student], List[String]) = {
    val lines = data.linesIterator.map(_.trim).filter(_.nonEmpty).toList
    if (lines.length < 2) return (Nil, Nil)

    // 解析表头
    val header = lines.head.split("\\s+").toList
    val courses = header.drop(2)

    // 解析学生数据
    val students = lines.tail.map { line =>
      val fields = line.split("\\s+")
      val id = fields(0)
      val gender = fields(1)
      val scores = (courses zip fields.drop(2).map(_.toDouble)).toMap
      Student(id, gender, scores)
    }

    (students, courses)
  }

  // 统计单个组
  private def calculateStats(students: List[Student], courses: List[String]):
  Map[String, (Double, Double, Double)] = {

    courses.map { course =>
      val scores = students.map(_.scores(course))
      val avg = scores.sum / scores.size
      val min = scores.min
      val max = scores.max
      course -> (avg, min, max)
    }.toMap
  }

  // 打印统计结果
  private def printStats(title: String, stats: Map[String, (Double, Double, Double)]): Unit = {
    println(title)
    println("course    average   min   max")
    stats.foreach { case (course, (avg, min, max)) =>
      println(f"$course:     $avg%5.2f   $min%5.2f   $max%5.2f")
    }
    println()
  }

  // 主处理函数
  def analyze(data: String): Unit = {
    val (students, courses) = parseData(data)

    if (students.isEmpty) {
      println("没有学生数据")
      return
    }

    println(s"共读取 ${students.size} 名学生数据")
    println(s"课程:${courses.mkString(", ")}")
    println("-" * 40)

    // 所有学生统计
    val overallStats = calculateStats(students, courses)
    printStats("所有学生成绩统计:", overallStats)

    // 男生统计
    val maleStats = calculateStats(students.filter(_.gender == "male"), courses)
    printStats("男生成绩统计:", maleStats)

    // 女生统计
    val femaleStats = calculateStats(students.filter(_.gender == "female"), courses)
    printStats("女生成绩统计:", femaleStats)
  }
}

// 测试主程序
object Test {
  def main(args: Array[String]): Unit = {
    println("=== 学生成绩统计分析 ===\n")

    // 测试样例1
    println("测试样例1:")
    val data1 =
      """Id  gender  Math    English   Physics
        |301610    male   80      64        78
        |301611  female   65      87        58
        |301612  female   44      71        77
        |301613  female   66      71        91
        |301614  female   70      71       100
        |301615    male   72      77        72
        |301616  female   73      81        75
        |301617  female   69      77        75
        |301618    male   73      61        65
        |301619    male   74      69        68
        |301620    male   76      62        76
        |301621    male   73      69        91
        |301622    male   55      69        61
        |301623    male   50      58        75
        |301624  female   63      83        93
        |301625    male   72      54       100
        |301626    male   76      66        73
        |301627    male   82      87        79
        |301628  female   62      80        54
        |301629    male   89      77        72""".stripMargin

    GradeStatistics.analyze(data1)

    println("=" * 60)

    // 测试样例2
    println("测试样例2:")
    val data2 =
      """Id  gender  Math    English   Physics  Science
        |301610    male   72   39     74   93
        |301611    male   75   85     93   26
        |301612  female   85   79     91   57
        |301613  female   63   89     61   62
        |301614    male   72   63     58   64
        |301615    male   99   82     70   31
        |301616  female  100   81     63   72
        |301617    male   74  100     81   59
        |301618  female   68   72     63  100
        |301619    male   63   39     59   87
        |301620  female   84   88     48   48
        |301621    male   71   88     92   46
        |301622    male   82   49     66   78
        |301623    male   63   80     83   88
        |301624  female   86   80     56   69
        |301625    male   76   69     86   49
        |301626    male   91   59     93   51
        |301627  female   92   76     79  100
        |301628    male   79   89     78   57
        |301629    male   85   74     78   80""".stripMargin

    GradeStatistics.analyze(data2)
  }
}

image

# 1. 进入Spark安装目录
cd /usr/local/spark  # 或者你的Spark安装路径

# 2. 启动spark-shell
./bin/spark-shell

// 在Spark Shell中执行时,可以将每个实验的代码分别执行

// ==================== 实验1: 数据加载和预览 ====================
:paste
val dataRDD = sc.textFile("file:///export/data/chapter5-data1.txt")
println("数据预览:")
dataRDD.take(10).foreach(println)
println()
// Ctrl+D结束粘贴

// ==================== 实验2: 统计学生人数 ====================
:paste
val studentCount = {
  dataRDD
    .map(line => line.split(",")(0))
    .distinct()
    .count()
}
println(s"该系总共有 $studentCount 名学生")
// Ctrl+D结束粘贴

// ==================== 实验3: 统计课程数量 ====================
:paste
val courseCount = {
  dataRDD
    .map(line => line.split(",")(1))
    .distinct()
    .count()
}
println(s"该系共开设了 $courseCount 门课程")
// Ctrl+D结束粘贴

// ==================== 实验4: Tom的平均分 ====================
:paste
val tomExists = dataRDD.filter(line => line.split(",")(0) == "Tom").count() > 0

if (tomExists) {
  val tomAvg = {
    dataRDD
      .filter(line => line.split(",")(0) == "Tom")
      .map(line => line.split(",")(2).toInt)
      .mean()
  }
  println(f"Tom同学的总成绩平均分是: $tomAvg%.2f")
} else {
  println("数据中没有Tom同学的成绩")
  
  // 查看实际存在的学生
  println("数据中前5名学生及其课程数:")
  val topStudents = {
    dataRDD
      .map(line => (line.split(",")(0), 1))
      .reduceByKey(_ + _)
      .sortBy(-_._2)
      .take(5)
  }
  topStudents.foreach { case (student, count) => 
    println(s"$student: $count 门课程")
  }
}
// Ctrl+D结束粘贴

// ==================== 实验5: 学生选修课程门数 ====================
:paste
val studentCourseCount = {
  dataRDD
    .map(line => (line.split(",")(0), 1))
    .reduceByKey(_ + _)
    .sortBy(-_._2)
}

println("学生选修课程门数统计(前10名):")
studentCourseCount.take(10).foreach { 
  case (student, count) => println(f"$student%-10s: $count 门")
}
// Ctrl+D结束粘贴

// ==================== 实验6: DataBase选修人数 ====================
:paste
val databaseStudentCount = {
  dataRDD
    .filter(line => line.split(",")(1) == "DataBase")
    .map(line => line.split(",")(0))
    .distinct()
    .count()
}

println(s"DataBase课程共有 $databaseStudentCount 人选修")
// Ctrl+D结束粘贴

// ==================== 实验7: 各课程平均分 ====================
:paste
val courseAvgScore = {
  dataRDD
    .map(line => {
      val parts = line.split(",")
      (parts(1), (parts(2).toInt, 1))
    })
    .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2))
    .mapValues { case (total, count) => total.toDouble / count }
}

println("各门课程的平均分:")
courseAvgScore.collect().sortBy(_._1).foreach { 
  case (course, avg) => println(f"$course: $avg%.2f")
}
// Ctrl+D结束粘贴

// ==================== 实验8: 使用累加器 ====================
:paste
val databaseAccumulator = sc.longAccumulator("Database选修人数")

val databaseStudents = {
  dataRDD
    .filter(line => line.split(",")(1) == "DataBase")
    .map(line => line.split(",")(0))
    .distinct()
}

databaseStudents.foreach { student =>
  databaseAccumulator.add(1)
}

println(s"使用累加器计算,DataBase课程共有 ${databaseAccumulator.value} 人选修")
// Ctrl+D结束粘贴

 

posted @ 2026-01-14 15:01  雨花阁  阅读(2)  评论(0)    收藏  举报