Flink - Asynchronous I/O

https://docs.google.com/document/d/1Lr9UYXEz6s6R_3PWg3bZQLF3upGaNEkc0rQCFSzaYDI/edit

 

// create the original stream
DataStream<String> stream = ...;

// apply the async I/O transformation
DataStream<Tuple2<String, String>> resultStream =
    AsyncDataStream.unorderedWait(stream, new AsyncDatabaseRequest(), 1000, TimeUnit.MILLISECONDS, 100);

 

AsyncDataStream

有一组接口,

unorderedWait
orderedWait

 

最终都是调用到,

addOperator(in, func, timeUnit.toMillis(timeout), capacity, OutputMode.ORDERED)

是否是ordered,只是最后一个参数不同

    private static <IN, OUT> SingleOutputStreamOperator<OUT> addOperator(
            DataStream<IN> in,
            AsyncFunction<IN, OUT> func,
            long timeout,
            int bufSize,
            OutputMode mode) {

        TypeInformation<OUT> outTypeInfo =
            TypeExtractor.getUnaryOperatorReturnType(func, AsyncFunction.class, false,
                true, in.getType(), Utils.getCallLocationName(), true);

        // create transform
        AsyncWaitOperator<IN, OUT> operator = new AsyncWaitOperator<>(
            in.getExecutionEnvironment().clean(func),
            timeout,
            bufSize,
            mode);

        return in.transform("async wait operator", outTypeInfo, operator);
    }

 

AsyncWaitOperator

setup主要是初始化,任务队列

    @Override
    public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {
        super.setup(containingTask, config, output);

        // create the operators executor for the complete operations of the queue entries
        this.executor = Executors.newSingleThreadExecutor(); //单线程的Executor,用于处理队列

        switch (outputMode) {
            case ORDERED:
                queue = new OrderedStreamElementQueue(
                    capacity,
                    executor,
                    this);
                break;
            case UNORDERED:
                queue = new UnorderedStreamElementQueue(
                    capacity,
                    executor,
                    this);
                break;
            default:
                throw new IllegalStateException("Unknown async mode: " + outputMode + '.');
        }
    }

 

看下,OrderedStreamElementQueue

public class OrderedStreamElementQueue implements StreamElementQueue {


    /** Queue for the inserted StreamElementQueueEntries. */
    private final ArrayDeque<StreamElementQueueEntry<?>> queue; //放所有的element


    @Override
    public AsyncResult peekBlockingly() throws InterruptedException { //
        lock.lockInterruptibly();

        try {
            while (queue.isEmpty() || !queue.peek().isDone()) { //如果queue的第一个element没有完成
                headIsCompleted.await(); //等锁,等他完成
            }

            return queue.peek(); //如果完成就peek出来,注意peek是不会移除这个element的,所以需要poll
        } finally {
            lock.unlock();
        }
    }


    @Override
    public AsyncResult poll() throws InterruptedException { //单独做poll
        lock.lockInterruptibly();

        try {
            while (queue.isEmpty() || !queue.peek().isDone()) { //如果第一个没完成,等待
                headIsCompleted.await();
            }

            notFull.signalAll(); //poll后,队列一定不满,所以解锁notFull

            return queue.poll();
        } finally {
            lock.unlock();
        }
    }


    private <T> void addEntry(StreamElementQueueEntry<T> streamElementQueueEntry) { //put,tryput都是调用这个

        queue.addLast(streamElementQueueEntry); //加到queue里面

        streamElementQueueEntry.onComplete(new AcceptFunction<StreamElementQueueEntry<T>>() { //给element加上complete的callback,调用onCompleteHandler
            @Override
            public void accept(StreamElementQueueEntry<T> value) {
                try {
                    onCompleteHandler(value);
                } 
            }
        }, executor);
    }

    private void onCompleteHandler(StreamElementQueueEntry<?> streamElementQueueEntry) throws InterruptedException {
        lock.lockInterruptibly();

        try {
            if (!queue.isEmpty() && queue.peek().isDone()) {
                headIsCompleted.signalAll(); //放开锁,告诉大家我完成了
            }
        } finally {
            lock.unlock();
        }
    }
}

对于queue主要就是,读取操作

这里取是分两步,先peek,再poll

 

open,主要是处理从snapshot中恢复的数据

并启动emiter

    @Override
    public void open() throws Exception {
        super.open();

        // process stream elements from state, since the Emit thread will start as soon as all
        // elements from previous state are in the StreamElementQueue, we have to make sure that the
        // order to open all operators in the operator chain proceeds from the tail operator to the
        // head operator.
        if (recoveredStreamElements != null) {
            for (StreamElement element : recoveredStreamElements.get()) { //处理从snapshot中恢复出的element
                if (element.isRecord()) {
                    processElement(element.<IN>asRecord());
                }
                else if (element.isWatermark()) {
                    processWatermark(element.asWatermark());
                }
                else if (element.isLatencyMarker()) {
                    processLatencyMarker(element.asLatencyMarker());
                }
                else {
                    throw new IllegalStateException("Unknown record type " + element.getClass() +
                        " encountered while opening the operator.");
                }
            }
            recoveredStreamElements = null;
        }

        // create the emitter
        this.emitter = new Emitter<>(checkpointingLock, output, queue, this); //创建Emitter

        // start the emitter thread
        this.emitterThread = new Thread(emitter, "AsyncIO-Emitter-Thread (" + getOperatorName() + ')');
        emitterThread.setDaemon(true);
        emitterThread.start();

    }

 

Emitter

    @Override
    public void run() {
        try {
            while (running) {
                LOG.debug("Wait for next completed async stream element result.");
                AsyncResult streamElementEntry = streamElementQueue.peekBlockingly();

                output(streamElementEntry);
            }

从queue中peek数据,对于上面OrderedStreamElementQueue,只有完成的数据会被peek到

    private void output(AsyncResult asyncResult) throws InterruptedException {
        if (asyncResult.isWatermark()) {
            //......
        } else {
            AsyncCollectionResult<OUT> streamRecordResult = asyncResult.asResultCollection();

            synchronized (checkpointLock) { //collect数据需要加checkpoint锁
                LOG.debug("Output async stream element collection result.");

                try {
                    Collection<OUT> resultCollection = streamRecordResult.get();

                    if (resultCollection != null) {
                        for (OUT result : resultCollection) {
                            timestampedCollector.collect(result); //真正emit数据
                        }
                    }
                } 
                
                // remove the peeked element from the async collector buffer so that it is no longer
                // checkpointed
                streamElementQueue.poll(); //emit完可以将数据从queue中删除

                // notify the main thread that there is again space left in the async collector
                // buffer
                checkpointLock.notifyAll();
            }
        }
    }

可以看到当数据被emit后,才会从queue删除掉

 

processElement

    @Override
    public void processElement(StreamRecord<IN> element) throws Exception {
        final StreamRecordQueueEntry<OUT> streamRecordBufferEntry = new StreamRecordQueueEntry<>(element); //封装成StreamRecordQueueEntry

        if (timeout > 0L) {
            // register a timeout for this AsyncStreamRecordBufferEntry
            long timeoutTimestamp = timeout + getProcessingTimeService().getCurrentProcessingTime();

            final ScheduledFuture<?> timerFuture = getProcessingTimeService().registerTimer( //开个定时器,到时间就会colloct一个超时异常
                timeoutTimestamp,
                new ProcessingTimeCallback() {
                    @Override
                    public void onProcessingTime(long timestamp) throws Exception {
                        streamRecordBufferEntry.collect(
                            new TimeoutException("Async function call has timed out."));
                    }
                });

            // Cancel the timer once we've completed the stream record buffer entry. This will remove
            // the register trigger task
            streamRecordBufferEntry.onComplete(new AcceptFunction<StreamElementQueueEntry<Collection<OUT>>>() { //在StreamRecordQueueEntry完成是触发删除这个定时器,这样就只有未完成的会触发定时器
                @Override
                public void accept(StreamElementQueueEntry<Collection<OUT>> value) {
                    timerFuture.cancel(true);
                }
            }, executor);
        }

        addAsyncBufferEntry(streamRecordBufferEntry); //把StreamRecordQueueEntry加到queue中去

        userFunction.asyncInvoke(element.getValue(), streamRecordBufferEntry); //调用用户定义的asyncInvoke
    }

 

StreamRecordQueueEntry

public class StreamRecordQueueEntry<OUT> extends StreamElementQueueEntry<Collection<OUT>>
    implements AsyncCollectionResult<OUT>, AsyncCollector<OUT> {

    /** Future containing the collection result. */
    private final CompletableFuture<Collection<OUT>> resultFuture;

    @Override
    public void collect(Collection<OUT> result) {
        resultFuture.complete(result);
    }

    @Override
    public void collect(Throwable error) {
        resultFuture.completeExceptionally(error);
    }
}

前面在emitter里面判断,entry是否做完就看,resultFuture是否isDone

可以看到resultFuture只有在collect的时候才会被complete

当resultFuture.complete时,onComplete callback会被触发,

这个callback在OrderedStreamElementQueue.addEntry被注册上来,做的事也就是告诉大家headIsCompleted;这样随后Emitter可以把结果数据emit出去

 

最终调用到用户定义的,

userFunction.asyncInvoke

@Override
    public void asyncInvoke(final String str, final AsyncCollector<Tuple2<String, String>> asyncCollector) throws Exception {

        // issue the asynchronous request, receive a future for result
        Future<String> resultFuture = client.query(str);

        // set the callback to be executed once the request by the client is complete
        // the callback simply forwards the result to the collector
        resultFuture.thenAccept( (String result) -> {

            asyncCollector.collect(Collections.singleton(new Tuple2<>(str, result)));
         
        });
    }
}

 

首先client必须是异步的,如果不是,没法返回Future,那需要自己用连接池实现

主要逻辑就是在resultFuture完成后,调用asyncCollector.collect把结果返回给element

posted on 2017-06-15 17:55  fxjwind  阅读(1321)  评论(0编辑  收藏  举报