Flink - DataStream StateFlink 包含了自动的状态管理例如进行单词的计数：下面的例子时持续的

Flink 包含了自动的状态管理

例如进行单词的计数：下面的例子时持续的从 socket 读取输入的单词的一个例子，每次的计数都会跟历史的计数数量进行相加的。

package com.learn.flink.source;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

import java.util.Arrays;

/**
 * 流数据 - 数据源 - socket
 * DataStream - source - socket
 */
public class SourceDemo_Socket {

    public static void main(String[] args) throws Exception {
        // 0： env
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
        // 1: source
        DataStream<String> ds = env.socketTextStream("node01", 999);

        // 2. transformation
        // 切割
        DataStream<String> words = ds.flatMap((String value, Collector<String> out) -> {
            Arrays.stream(value.split(" ")).forEach(out::collect);
        }).returns(Types.STRING);
        // 每个词计数为1
        DataStream<Tuple2<String, Integer>> wordAndOne = words.map(value -> Tuple2.of(value, 1))
                .returns(Types.TUPLE(Types.STRING, Types.INT));
        // 分组
        final KeyedStream<Tuple2<String, Integer>, String> grouped = wordAndOne.keyBy(value -> value.f0);
        // 聚合
        final SingleOutputStreamOperator<Tuple2<String, Integer>> sum = grouped.sum(1);
        // 3: sink
        sum.print();
        // 4: execute
        env.execute();
    }


}

这个结果其实就是维护了一个历史数据的状态，这是 Flink 进行自动管理的。

那么在一些其他需要的场景下，状态也可以进行手动的维护。

无状态计算和有状态计算

无状态的计算：那么只要输入的数据相同，进行相同的计算，结果时一样的。例如 map, filter 等。无状态的计算时简单的。
有状态的计算有可以分为 2 种：

输入的数据包含状态
算子本身包含状态有状态的计算时复杂的！

一般的使用场景有：

访问的历史数据，需要与昨日进行对比
窗口计算

State 的分类

Managed State:flink 进行自动管理和优化，支持多种数据结构,大多数情况下均可以使用

Keyed State:在分组之后即使用了 keyBy 上使用
Operator State: 可以在所有算子上使用，一般在 source上

Raw State：完全由用户自己管理，只支持 byte[]，在自定义 operator 时可以使用，但是很少自定义。

详细图解：

示例

KeyedState：

package com.learn.flink.state;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/**
 * 使用 keyedState 种的 valueState 来计算最大值，实际开发种使用 maxBy 即可
 */
public class StateDemo_KeyedState {

    public static void main(String[] args) throws Exception {
        //0:env
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
        //1: source
        DataStream<Tuple2<String, Long>> ds = env.fromElements(
                Tuple2.of("北京", 1L),
                Tuple2.of("北京", 4L),
                Tuple2.of("上海", 8L),
                Tuple2.of("北京", 3L),
                Tuple2.of("上海", 2L)
        );
        //2: transformation
        //需求：求每个城市最大的销售额
        final SingleOutputStreamOperator<Tuple2<String, Long>> result = ds.keyBy(t -> t.f0).maxBy(1);
        // 使用 keyedState 种的 valueState 来计算最大值,模拟 maxBy 的实验原理
        final SingleOutputStreamOperator<Tuple3<String, Long, Long>> result2 = ds.keyBy(t -> t.f0).map(new RichMapFunction<Tuple2<String, Long>, Tuple3<String, Long, Long>>() {
            //1: 定义一个状态存储最大值
            private ValueState<Long> maxValueState;

            //2: 状态初始化
            @Override
            public void open(Configuration parameters) throws Exception {
                //2-1:创建状态描述器
                final ValueStateDescriptor<Long> maxValueStateDesc = new ValueStateDescriptor("maxValueState", Long.class);
                //2-2:根据状态描述初始化状态
                maxValueState = getRuntimeContext().getState(maxValueStateDesc);
            }

            //3: 使用状态
            @Override
            public Tuple3<String, Long, Long> map(Tuple2<String, Long> value) throws Exception {
                final Long currentValue = value.f1;
                //获取状态
                final Long historyMaxValue = maxValueState.value();
                if (null == historyMaxValue || historyMaxValue < currentValue) {
                    maxValueState.update(currentValue);
                }
                return Tuple3.of(value.f0, value.f1, maxValueState.value());
            }
        });
        //3: sink
        result2.print();
        //4: execute
        env.execute();

    }
}