Flink2.1.1-WordCount示例
安装
Java代码示例
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example.flink</groupId>
<artifactId>flink-wordcount</artifactId>
<version>1.0.0</version>
<name>flink-wordcount</name>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<flink.version>2.1.1</flink.version>
<scala.binary.version>2.12</scala.binary.version>
<log4j.version>2.24.3</log4j.version>
<commons-math3.version>3.6.1</commons-math3.version>
<lombok.version>1.18.26</lombok.version>
</properties>
<dependencies>
<!-- Flink Streaming 核心依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- Flink 客户端依赖,用于本地执行 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>${commons-math3.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Java 编译插件 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.example.flink.WordCount</mainClass>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
代码
package com.example.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* Flink WordCount 运行示例
*/
public class WordCount {
public static void main(String[] args) throws Exception {
// 1. 创建本地执行环境
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 创建一个静态的数据源
DataStream<String> text = env.fromElements(
"To be, or not to be, that is the question:",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune,",
"Or to take arms against a sea of troubles",
"And by opposing end them."
);
// 3. 执行转换和计算
DataStream<Tuple2<String, Integer>> counts =
// 将每行文本分割成单词
text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
String[] words = value.toLowerCase().split("\\s+");
for (String word : words) {
// 过滤掉空字符串和标点符号
if (word.length() > 0) {
out.collect(new Tuple2<>(word.replaceAll("[^a-z]", ""), 1));
}
}
}
})
// 按单词进行分组
.keyBy(value -> value.f0)
// 对每个单词的计数进行求和
.sum(1);
// 4. 输出结果到控制台
counts.print();
// 5. 执行作业
env.execute("Socket WordCount");
}
}
运行示例
上传WordCount的jar包到flink

WordCount任务

WordCount任务日志


浙公网安备 33010602011771号