Simple Demo:
public class stream {
public static void main(String[] args) {
Stack s = new Stack();
s.add(10);
s.stream().map(p->p).distinct().count();
}
}
java.util.stream 源码解析
迭代器+Spliterator+function+泛型
责任链&拜访者模式
重要的类
java.util.stream.Stream(1.8):是个接口 定义了 map,*foreach,count,matchAny....*
AbstractPipeline(管道 双向链表结构): stream实现类 stream函数式编程中的每个函数都是一个管道(管道中包含prev next head), 最终组成一个双向链表。数据就像水流一样从管道链(类似于tomcat拦截链)中经过并被管道里的sink处理
Sink : 数据只是从管道中流过, sink用于处理从管道中流过的数据
上述三种类组成的调用链就是我们俗称的stream
TerminalOp:输入stream产生最终结果或者抛出异常
Spliterator(1.8):
1. 遍历
default void forEachRemaining(Consumer<? super T> action) {
do { } while (tryAdvance(action));
}
2.数据源的特征(sort(有序), distinct(去重), size(大小有限) ...), 统一数据格式, 提供统一的stream入口(拜访者模式)
int characteristics();
3.遍历数据源的方法.
Spliterator<T> trySplit();
4. 数据源类型
数据源包括组,集合IO通道, java.util.function. (宽泛来讲IO通道 是byte[], 注意function这个点)
java.util.function:Stream调用的方法都是function + 泛型实现的
源码分析
//回到 simple demo
s.stream().map(p->p).distinct().count();
stream每一个函数都对应一个AbstractPipeline(管道)对象,组成一条管道链(类似于tomcat拦截器)
整个过程分成三个阶段
初始化阶段, 中间阶段, 结算阶段
stream() //初始化阶段
map(p->p).distinct() //中间阶段(也叫中间操作)
count() //结算阶段(结算操作)
初始化阶段: 初始化方法链条 new ReferencePipeline.head
中间阶段: 新建AbstractPipeline 拼接到管道链上, 中间阶段又分为有状态操作(.distinct)和无状态操作(.map)两种,对应内部类StatefulOp extend Pipeline, StatelessOp extend Pipeline.
结算阶段: 通过TerminalOp(不继承Pipeline)沿着管道链创建一条方法(Sink)调用链,并通过Spliterator调用方法链返回结果
阶段的划分, 是注释中真实存在的
-------------------- 有状态,无状态 --------------------
StatefulOp 有状态: 会根據StreamOpFlag的不同引發不同操作
StatelessOp 无状态: 不受StreamOpFlag影响的通用方法
-------------------- StreamShape --------------------
enum StreamShape: REFERENCE|INT_VALUE|LONG_VALUE|DOUBLE_VALUE
根据流的类型 将BaseStream, AbstractPipeline划分为
Stream,IntStream,LongStream,DoubleStream
ReferencePipeline extend AbstractPipeline implements Stream
IntPipeline extends AbstractPipeline implements IntStream
LongPipeline extends AbstractPipeline implements LongStream
DoublePipeline extends AbstractPipeline implements DoubleStream
-------------------- TerminalOp 终结操作 --------------------
TerminalOp根据行为分类
FindOp 搜索
ForEachOp 循环
MatchOp 匹配
ReduceOp 归并操作
调用归并操作的stream方法
.collect
.reduce
各类操作的工具类FindOps ForEachOps MatchOp ReduceOps
初始化阶段
s.stream() 拜访者模式
------------------- 先解析stream -----------------
stream 分成构造stream 中间状态 和 结束状态
构造stream 主要分成两步
1.将集合转化成一个迭代器
2.创建一个管道(是一个链表, 类似于拦截器),
创建方法 new ReferencePipeline.Head<>(spliterator(), 流特征值)
-------------------------------------------------
public interface Collection<E> extends Iterable<E> {
default Stream<E> stream() {
// spliterator() 将集合生成迭代器
return StreamSupport.stream(spliterator(), false);
}
@Override
default Spliterator<E> spliterator() {
return Spliterators.spliterator(this, 0);
}
}
------------------------- Spliterators.spliterator -------------------------
// 1.8新出的类 结合迭代器 应对stream并行模式
public final class Spliterators {
public static <T> Spliterator<T> spliterator
(Collection<? extends T> c, int characteristics) {
return new IteratorSpliterator<>(c,characteristics);
}
//characteristics ? 特征 ? 什么的特征
static class IteratorSpliterator<T> implements Spliterator<T> {
public IteratorSpliterator(Collection collection, int characteristics) {
this.collection = collection;
this.it = null;
this.characteristics = (characteristics & Spliterator.CONCURRENT) == 0
? characteristics | Spliterator.SIZED | Spliterator.SUBSIZED
: characteristics;
}
/**
* @see Collection
* @since 1.8
*/
// 用于遍历和切分数据源, 数据源可以是数组、集合、IO通道或Function。
// 对集合数组的特征进行抽象并抽象出遍历他们的方法, stream就是使用Spliterator中的tryAdvance和forEachRemaining
public interface Spliterator<T> {\
// 在stream函数链中会用到, 对集合当前元素执行给定的函数[function.apply(element)]
// 并判断迭代器中是否还有其他元素, 有返回true 没返回false
boolean tryAdvance(Consumer<? super T> action);
// 轮询迭代器中的元素 调用tryAdvance
default void forEachRemaining(Consumer<? super T> action) {
do { } while (tryAdvance(action));
}
// 返回迭代器中元素预估数量。 不知道为什么是预估值
long estimateSize();
//返回特征值
int characteristics();
default boolean hasCharacteristics(int characteristics) {
return (characteristics() & characteristics) == characteristics;
}
//返回排序器 集合特征SORTED 可调用此方法
default Comparator<? super T> getComparator() {
throw new IllegalStateException();
}
/** 16进制 -> 二进制 集合特征值**/
// 顺序 (list) forEachRemaining(迭代时受此特征影响)
public static final int ORDERED = 0x00000010; 10000
// 不重复 (set)
public static final int DISTINCT = 0x00000001; 1
// 可排序 (SortedSet) 前置特征ORDERED
public static final int SORTED = 0x00000004; 100
// 大小有限(数组 ArrayList)
public static final int SIZED = 0x00000040; 100 0000
// 不能为空
public static final int NONNULL = 0x00000100; 1 0000 0000
// 不可变:迭代器中的元素一旦改变就抛出异常
public static final int IMMUTABLE = 0x00000400; 100 0000 0000
--------------------- StreamSupport.stream ----------------------
public final class StreamSupport {
public static <T> Stream<T> stream(Spliterator<T> spliterator) {
return new ReferencePipeline.Head<>(spliterator,
// 把集合特征 转换成 流操作信号量
// StreamOpFlag 类似于我们的 sys_config
// 流操作信号量还挺复杂的看不太懂, 但是还挺重要的
StreamOpFlag.fromCharacteristics(spliterator));
}
abstract class ReferencePipeline<P_IN, P_OUT>
extends AbstractPipeline<P_IN, P_OUT, Stream<P_OUT>>
implements Stream<P_OUT> {
ReferencePipeline(Spliterator<?> source, int sourceFlags) {
super(source, sourceFlags);
}
static class Head<E_IN, E_OUT> extends ReferencePipeline<E_IN, E_OUT> {
Head(Spliterator<?> source, int sourceFlags) {
super(source, sourceFlags);
}
abstract class AbstractPipeline<E_IN, E_OUT, S extends BaseStream<E_OUT, S>>
extends PipelineHelper<E_OUT> implements BaseStream<E_OUT, S> {
// 任何一个管道都会存储的 管道道链头部, head为自身
private final AbstractPipeline sourceStage;
// 上游管道, 源阶段为 null
private final AbstractPipeline previousStage;
// 管道中间阶段的操作标志
protected final int sourceOrOpFlags;
// 下一个阶段, final 阶段为 null
private AbstractPipeline nextStage;
// pipeline调用链的深度
private int depth;
AbstractPipeline(Spliterator<?> source, int sourceFlags) {
this.previousStage = null;
this.sourceSpliterator = source;
this.sourceStage = this;
this.sourceOrOpFlags = sourceFlags & StreamOpFlag.STREAM_MASK;
this.depth = 0;
}
----------------------- 初始化结束 -----------------------
中间阶段
.map(p->p) 无状态中间阶段
.distinct() 有状态中间阶段
abstract class ReferencePipeline<P_IN, P_OUT>
extends AbstractPipeline<P_IN, P_OUT, Stream<P_OUT>>
implements Stream<P_OUT> {
@Override
public final <R> Stream<R> map(Function<? super P_OUT, ? extends R> mapper) {
// StatelessOp 名字很直接 无状态操作
// StreamShape 迭代器里的元素类型: REFERENCE,对象 | INT_VALUE,int | LONG_VALUE,long | DOUBLE_VALUE,double 类型分的蛮奇怪的
return new StatelessOp<P_OUT, R>(this, StreamShape.REFERENCE,
StreamOpFlag.NOT_SORTED | StreamOpFlag.NOT_DISTINCT) {
@Override
// 结束阶段会顺着管道链调用此方法 生成方法调用链,所有管道都會實現這個方法
Sink<P_OUT> opWrapSink(int flags, Sink<R> sink) {
return new Sink.ChainedReference<P_OUT, R>(sink) {
@Override
public void accept(P_OUT u) {
// mapper.apply(u) 函数式接口调用
// downstream.accept(?) 将结果传给下一个Sink
downstream.accept(mapper.apply(u));
abstract static class StatelessOp<E_IN, E_OUT> extends ReferencePipeline<E_IN, E_OUT> {
StatelessOp(AbstractPipeline<?, E_IN, ?> upstream, StreamShape inputShape, int opFlags) {
super(upstream, opFlags);
//这里很重要 追一下
@Override
final boolean opIsStateful() {
//return false 无状态, return true 有状态
return false;
#super
ReferencePipeline(AbstractPipeline<?, P_IN, ?> upstream, int opFlags) {
super(upstream, opFlags);
abstract class AbstractPipeline<E_IN, E_OUT, S extends BaseStream<E_OUT, S>>
extends PipelineHelper<E_OUT> implements BaseStream<E_OUT, S> {
AbstractPipeline(AbstractPipeline<?, E_IN, ?> previousStage, int opFlags) {
previousStage.nextStage = this;
this.previousStage = previousStage;
this.sourceStage = previousStage.sourceStage;
if (opIsStateful())
sourceStage.sourceAnyStateful = true;
this.depth = previousStage.depth + 1;
}
interface Sink<T> extends Consumer<T> {
static abstract class ChainedReference<T, E_OUT> implements Sink<T> {
protected final Sink<? super E_OUT> downstream;
---------------------------- sink -------------------------
interface Sink<T> extends Consumer<T>
.distinct()
abstract class ReferencePipeline<P_IN, P_OUT>
extends AbstractPipeline<P_IN, P_OUT, Stream<P_OUT>>
implements Stream<P_OUT> {
@Override
public final Stream<P_OUT> distinct() {
return DistinctOps.makeRef(this);
}
abstract static class StatefulOp<E_IN, E_OUT> extends ReferencePipeline<E_IN, E_OUT> {
// 添加有状态的中间操作
(AbstractPipeline<?, E_IN, ?> upstream, StreamShape inputShape, int opFlags) {
super(upstream, opFlags);
//元素形状不能改变
assert upstream.getOutputShape() == inputShape;
// 有状态中间操作
final class DistinctOps {
static <T> ReferencePipeline<T,T> makeRef(AbstractPipeline<?,T,?> upstream) {
// 增加 "distinct" operation 到提供的stream 并返回新的stream
return new ReferencePipeline.StatefulOp<T, T>( upstream, StreamShape.REFERENCE,
StreamOpFlag.IS_DISTINCT | StreamOpFlag.NOT_SIZED) {
@Override
Sink<T> opWrapSink(int flags, Sink<T> sink) {
// 由此可見有狀態的狀態指的是,根據StreamOpFlag 不同引發不同操作
if (StreamOpFlag.DISTINCT.isKnown(flags)) {
return sink;
} else {
return new Sink.ChainedReference<T, T>(sink) {
Set<T> seen;
@Override
public void accept(T t) {
if (!seen.contains(t)) {
seen.add(t);
downstream.accept(t);
结算阶段
.count();
abstract class ReferencePipeline<P_IN, P_OUT>
extends AbstractPipeline<P_IN, P_OUT, Stream<P_OUT>>
implements Stream<P_OUT> {
@Override
public final long count() {
return mapToLong(e -> 1L).sum();
}
@Override
public final LongStream mapToLong(ToLongFunction<? super P_OUT> mapper) {
//LongPipeline 长整形通道
return new LongPipeline.StatelessOp<P_OUT>(this, StreamShape.REFERENCE,
StreamOpFlag.NOT_SORTED | StreamOpFlag.NOT_DISTINCT) {
@Override
Sink<P_OUT> opWrapSink(int flags, Sink<Long> sink) {
return new Sink.ChainedReference<P_OUT, Long>(sink) {
@Override
public void accept(P_OUT u) {
//计数
downstream.accept(mapper.applyAsLong(u));
abstract class LongPipeline<E_IN>
extends AbstractPipeline<E_IN, Long, LongStream> implements LongStream {
@Override
#sum()
public final long sum() {
return reduce(0, Long::sum);
@Override
#reduce()
public final long reduce(long identity, LongBinaryOperator op) {
//op = Long:sum
return evaluate(ReduceOps.makeLong(identity, op));
#evaluate()
final <R> R evaluate(TerminalOp<E_OUT, R> terminalOp) {
return terminalOp.evaluateSequential(this,
//获取管道头
sourceSpliterator(terminalOp.getOpFlags()));
final class ReduceOps {
#ReduceOps.makeLong
// TerminalOp 终止操作(结算阶段构建)
public static TerminalOp<Long, Long>
makeLong(long identity, LongBinaryOperator operator) {
class ReducingSink implements AccumulatingSink<Long, Long, ReducingSink>, Sink.OfLong {
private long state;
@Override
public void accept(long t) {
//operator = Long:sum
state = operator.applyAsLong(state, t);
}
}
return new ReduceOp<Long, Long, ReducingSink>(StreamShape.LONG_VALUE) {
@Override
#makeSink()
public ReducingSink makeSink() {
return new ReducingSink();
private static abstract class ReduceOp<T, R, S extends AccumulatingSink<T, R, S>> implements TerminalOp<T, R> {
ReduceOp(StreamShape shape) {
inputShape = shape;
}
public abstract S makeSink();
@Override
#terminalOp.evaluateSequential
public <P_IN> R evaluateSequential(PipelineHelper<T> helper, Spliterator<P_IN> spliterator) {
//wrapAndCopyInto 构建方法链(#makeSink())并调用
return helper.wrapAndCopyInto(makeSink(), spliterator).get();
}
abstract class AbstractPipeline<E_IN, E_OUT, S extends BaseStream<E_OUT, S>>
extends PipelineHelper<E_OUT> implements BaseStream<E_OUT, S> {
@Override
final <P_IN, S extends Sink<E_OUT>> S wrapAndCopyInto(S sink, Spliterator<P_IN> spliterator) {
//wrapSink(sink) 创建方法链
//spliterator 源数据
copyInto(wrapSink(sink), spliterator);
return sink;
}
@Override
final <P_IN> Sink<P_IN> wrapSink(Sink<E_OUT> sink) {
//sink(ReducingSink) 由上文的makesink 方法构建
//AbstractPipeline.this 也就是调用.count时创建的Pipeline
for (AbstractPipeline p=AbstractPipeline.this; p.depth > 0; p=p.previousStage) {
//创建方法链
sink = p.opWrapSink(p.previousStage.combinedFlags, sink);
}
return (Sink<P_IN>) sink;
}
@Override
#copyInto
final <P_IN> void copyInto(Sink<P_IN> wrappedSink, Spliterator<P_IN> spliterator) {
copyIntoWithCancel(wrappedSink, spliterator);
}
@Override
@SuppressWarnings("unchecked")
final <P_IN> void copyIntoWithCancel(Sink<P_IN> wrappedSink, Spliterator<P_IN> spliterator) {
@SuppressWarnings({"rawtypes","unchecked"})
AbstractPipeline p = AbstractPipeline.this;
while (p.depth > 0) {
p = p.previousStage;
}
p.forEachWithCancel(spliterator, wrappedSink);
}
abstract class LongPipeline<E_IN>
extends AbstractPipeline<E_IN, Long, LongStream>
implements LongStream {
@Override
final void forEachWithCancel(Spliterator<Long> spliterator, Sink<Long> sink) {
// 初始化阶段的Spliterator? 对集合进行了高度的抽象
Spliterator.OfLong spl = adapt(spliterator);
LongConsumer adaptedSink = adapt(sink);
// spl.tryAdvance(adaptedSink) 调用方法链
do { } while (spl.tryAdvance(adaptedSink));
}
private static Spliterator.OfLong adapt(Spliterator<Long> s) {
return (Spliterator.OfLong) s;
private static LongConsumer adapt(Sink<Long> sink) {
return (LongConsumer) sink;
class StreamSpliterators {
static final class LongWrappingSpliterator<P_IN> implements Spliterator.OfLong {
@Override
#spl.tryAdvance(adaptedSink)
public boolean tryAdvance(LongConsumer consumer) {
boolean hasNext = doAdvance();
if (hasNext)
//触发方法链, 由此处可见 元素是一次性通过方法链 而不是所有元素在一个方法执行完毕再传递给下一个, 这样写效率会高很多
consumer.accept(buffer.get(nextToConsume));
return hasNext;
}