大数据学习之Flink

142 阅读1分钟

目前Flink更新版本很快,官网的文档更新速度也很快,但是不会兼容旧版本的文档,哪怕是小的版本更新,类基本上说干掉就干掉了,会导致其他文档的教程一用就报错,还找不到原始文档地址,这就尴尬了。

1 maven依赖参考如下:

<project xmlns="http://maven.apache.org/POM/4.0.0"  
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
<modelVersion>4.0.0</modelVersion>  
  
<groupId>org.example</groupId>  
<artifactId>ph-flink117</artifactId>  
<version>1.0-SNAPSHOT</version>  
  
<properties>  
<maven.compiler.source>11</maven.compiler.source>  
<maven.compiler.target>11</maven.compiler.target>  
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  
<flink.version>1.17.2</flink.version>  
</properties>  
  
<dependencies>  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-streaming-java</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-clients</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-runtime-web</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-connector-files</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-connector-kafka</artifactId>  
<version>${flink.version}</version>  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-connector-datagen</artifactId>  
<version>${flink.version}</version>  
</dependency>  
  
<dependency>  
<groupId>mysql</groupId>  
<artifactId>mysql-connector-java</artifactId>  
<version>8.0.27</version>  
</dependency>  
  
<!--目前中央仓库还没有 jdbc的连接器,暂时用一个快照版本-->  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-connector-jdbc</artifactId>  
<version>1.17-SNAPSHOT</version>  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-statebackend-rocksdb</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
<dependency>  
<groupId>org.apache.hadoop</groupId>  
<artifactId>hadoop-client</artifactId>  
<version>3.3.4</version>  
<!-- <scope>provided</scope>-->  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-statebackend-changelog</artifactId>  
<version>${flink.version}</version>  
<scope>runtime</scope>  
</dependency>  
  
<dependency>  
<groupId>com.google.code.findbugs</groupId>  
<artifactId>jsr305</artifactId>  
<version>1.3.9</version>  
</dependency>  
  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-table-api-java-bridge</artifactId>  
<version>${flink.version}</version>  
</dependency>  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-table-planner-loader</artifactId>  
<version>${flink.version}</version>  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-table-runtime</artifactId>  
<version>${flink.version}</version>  
</dependency>  
  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-connector-files</artifactId>  
<version>${flink.version}</version>  
</dependency>  
  
<!--引入workthrough common包,否则helloworld无法使用-->  
<dependency>  
<groupId>org.apache.flink</groupId>  
<artifactId>flink-walkthrough-common_2.12</artifactId>  
<version>1.14.6</version>  
</dependency>  
  
</dependencies>  
  
<repositories>  
<repository>  
<id>apache-snapshots</id>  
<name>apache snapshots</name>  
<url>https://repository.apache.org/content/repositories/snapshots/</url>  
<!--<url>https://maven.aliyun.com/repository/apache-snapshots</url>-->  
</repository>  
</repositories>  
  
  
<build>  
<plugins>  
<plugin>  
<groupId>org.apache.maven.plugins</groupId>  
<artifactId>maven-shade-plugin</artifactId>  
<version>3.2.4</version>  
<executions>  
<execution>  
<phase>package</phase>  
<goals>  
<goal>shade</goal>  
</goals>  
<configuration>  
<artifactSet>  
<excludes>  
<exclude>com.google.code.findbugs:jsr305</exclude>  
<exclude>org.slf4j:*</exclude>  
<exclude>log4j:*</exclude>  
<exclude>org.apache.hadoop:*</exclude>  
</excludes>  
</artifactSet>  
<filters>  
<filter>  
<!-- Do not copy the signatures in the META-INF folder.  
Otherwise, this might cause SecurityExceptions when using the JAR. -->  
<artifact>*:*</artifact>  
<excludes>  
<exclude>META-INF/*.SF</exclude>  
<exclude>META-INF/*.DSA</exclude>  
<exclude>META-INF/*.RSA</exclude>  
</excludes>  
</filter>  
</filters>  
<transformers combine.children="append">  
<transformer  
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">  
</transformer>  
</transformers>  
</configuration>  
</execution>  
</executions>  
</plugin>  
</plugins>  
</build>  
  
</project>

2运行demo

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.walkthrough.common.sink.AlertSink;
import org.apache.flink.walkthrough.common.entity.Alert;
import org.apache.flink.walkthrough.common.entity.Transaction;
import org.apache.flink.walkthrough.common.source.TransactionSource;

public class FraudDetectionJob {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Transaction> transactions = env
            .addSource(new TransactionSource())
            .name("transactions");

        DataStream<Alert> alerts = transactions
            .keyBy(Transaction::getAccountId)
            .process(new FraudDetector())
            .name("fraud-detector");

        alerts
            .addSink(new AlertSink())
            .name("send-alerts");

        env.execute("Fraud Detection");
    }
}



import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.walkthrough.common.entity.Alert;
import org.apache.flink.walkthrough.common.entity.Transaction;

public class FraudDetector extends KeyedProcessFunction<Long, Transaction, Alert> {

    private static final long serialVersionUID = 1L;

    private static final double SMALL_AMOUNT = 1.00;
    private static final double LARGE_AMOUNT = 500.00;
    private static final long ONE_MINUTE = 60 * 1000;

    @Override
    public void processElement(
            Transaction transaction,
            Context context,
            Collector<Alert> collector) throws Exception {

        Alert alert = new Alert();
        alert.setId(transaction.getAccountId());

        collector.collect(alert);
    }
}

3 坑B点

1 程序必须引入

<groupId>org.apache.flink</groupId>  
<artifactId>flink-streaming-java</artifactId>  
<version>${flink.version}</version>  
<!-- <scope>provided</scope>-->  
</dependency>  

<groupId>org.apache.flink</groupId>  
<artifactId>flink-walkthrough-common_2.12</artifactId>  
<version>1.14.6</version>  
</dependency> 

2 坑B2

一般用于 部署时版本重复冲突了,需要加上打包时用,但是main运行时要注释掉,不然程序找不到类了