How asyncio handles IO events

Time:2020-5-22

How does asyncio use event loops to monitor and handle IO events? See the source code:

# asyncio.streams.py
async def open_connection(host=None, port=None, *,
                          loop=None, limit=_DEFAULT_LIMIT, **kwds):
    """A wrapper for create_connection() returning a (reader, writer) pair.
    """
    if loop is None:
        loop = events.get_event_loop()
    Reader = StreamReader (limit = limit, loop = loop)
    Protocol = streamreaderprotocol (reader, loop = loop) ා the protocol instance and the reader encapsulation provide several methods to set the reader's read channel, etc
    transport, _  = await  loop.create_ Connection (ා create TCP connection, return write channel
        lambda: protocol, host, port, **kwds)
    Writer = streamwriter (transport, protocol, reader, loop) - create writer instance
    return reader, writer

loop.create_connectionmethod

# asyncio.base_events.py
async def create_connection(
        self, protocol_factory, host=None, port=None,
        *, ssl=None, family=0,
        proto=0, flags=0, sock=None,
        local_addr=None, server_hostname=None,
        ssl_handshake_timeout=None):
    """Connect to a TCP server.

    Create a streaming transport connection to a given Internet host and
    port: socket family AF_INET or socket.AF_INET6 depending on host (or
    family if specified), socket type SOCK_STREAM. protocol_factory must be
    a callable returning a protocol instance.

    This method is a coroutine which will try to establish the connection
    in the background. When successful, the coroutine returns a
    (transport, protocol) pair.
    """
    if server_hostname is not None and not ssl:
        raise ValueError('server_hostname is only meaningful with ssl')

    if server_hostname is None and ssl:
        # Use host as default for server_hostname. It is an error
        # if host is empty or not set, e.g. when an
        # already-connected socket was passed or when only a port
        # is given. To avoid this error, you can pass
        # server_hostname='' -- this will bypass the hostname
        # check. (This also means that if host is a numeric
        # IP/IPv6 address, we will attempt to verify that exact
        # address; this will probably fail, but it is possible to
        # create a certificate for a specific IP address, so we
        # don't judge it here.)
        if not host:
            raise ValueError('You must set server_hostname '
                                'when using ssl without a host')
        server_hostname = host

    if ssl_ handshake_ Timeout is not none and not SSL: the timeout parameter can only be used in SSL mode
        raise ValueError(
            'ssl_handshake_timeout is only meaningful with ssl')

    if host is not None or port is not None: 
        If socket is not none: host / port cannot be passed at the same time as socket
            raise ValueError(
                'host/port and sock can not be specified at the same time')

        infos = await self._ ensure_ Resolved (ා get socket information according to host port, including protocol family, type, protocol 6, CNAME, (IP, port)
            (host, port), family=family,
            type=socket.SOCK_STREAM, proto=proto, flags=flags, loop=self)
        if not infos:
            raise OSError('getaddrinfo() returned empty list')

        if local_ Addr is not none: if the local address (IP, port) is passed
            laddr_ infos = await self._ ensure_ Resolved (ා according to IP, port obtains socket information
                local_addr, family=family,
                type=socket.SOCK_STREAM, proto=proto,
                flags=flags, loop=self)
            if not laddr_infos:
                raise OSError('getaddrinfo() returned empty list')

        exceptions = []
        for family, type, proto, cname, address in infos:
            try:
                sock =  socket.socket (family = family, type = type, proto = proto)
                sock.setblocking (false) set non blocking socket
                if local_ Addr is not none: if the local IP and port are passed, start listening to the port
                    for _, _, _, _, laddr in laddr_infos:
                        try:
                            sock.bind(laddr)
                            break
                        except OSError as exc:
                            msg = (
                                f'error while attempting to bind on '
                                f'address {laddr!r}: '
                                f'{exc.strerror.lower()}'
                            )
                            exc = OSError(exc.errno, msg)
                            exceptions.append(exc)
                    else:
                        sock.close()
                        sock = None
                        continue
                if self._debug:
                    logger.debug("connect %r to %r", sock, address)
                await  self.sock_ Connect (sock, address) ා send TCP connection
            except OSError as exc:
                if sock is not None:
                    sock.close()
                exceptions.append(exc)
            except:
                if sock is not None:
                    sock.close()
                raise
            else:
                break
        else:
            if len(exceptions) == 1:
                raise exceptions[0]
            else:
                # If they all have the same str(), raise one.
                model = str(exceptions[0])
                if all(str(exc) == model for exc in exceptions):
                    raise exceptions[0]
                # Raise a combined exception so the user can see all
                # the various error messages.
                raise OSError('Multiple exceptions: {}'.format(
                    ', '.join(str(exc) for exc in exceptions)))

    else:
        if sock is None:
            raise ValueError(
                'host and port was not specified and no sock specified')
        if sock.type != socket.SOCK_STREAM:
            # We allow AF_INET, AF_INET6, AF_UNIX as long as they
            # are SOCK_STREAM.
            # We support passing AF_UNIX sockets even though we have
            # a dedicated API for that: create_unix_connection.
            # Disallowing AF_UNIX in this method, breaks backwards
            # compatibility.
            raise ValueError(
                f'A Stream Socket was expected, got {sock!r}')

    #Create connection channel, return to write channel
    #(_ SelectorSocketTransport()._ loop.call_ soon(self._ protocol.connection_ Make, self) bind the write channel through this function)
    #Self_ Protocol is protocol 
    transport, protocol = await self._create_connection_transport(  
        sock, protocol_factory, ssl, server_hostname,
        ssl_handshake_timeout=ssl_handshake_timeout)
    if self._debug:
        # Get the socket from the transport because SSL transport closes
        # the old socket and creates a new SSL socket
        sock = transport.get_extra_info('socket')
        logger.debug("%r connected to %s:%r: (%r, %r)",
                        sock, host, port, transport, protocol)
    Return transport, protocol - returns the write channel and protocol object

Write events are handled asynchronously

And then take a quick lookStreamWriterClass:

class StreamWriter:
    ...
    def write(self, data):
        self._transport.write(data)   
        #Call the write method of the channel to write the data to the channel cache or directly to the socket

    async def wait_closed(self):
        await self._protocol._closed

    Async def drain (self): wait for all data in the cache to be stored
        """Flush the write buffer.

        The intended use is to write

          w.write(data)
          await w.drain()
        """
        if self._reader is not None:
            exc = self._reader.exception()
            if exc is not None:
                raise exc
        if self._transport.is_closing():
            await sleep(0, loop=self._loop)
        await self._ protocol._ drain_ Helper() (create future until the pause is cancelled)

Writer onlywait_closedanddrainTwo coordination methods.

# asyncio.selector_events.BaseSelectorEventLoop
    def _add_writer(self, fd, callback, *args):
        self._check_closed()
        handle = events.Handle(callback, args, self, None)
        try:
            key = self._selector.get_key(fd)
        except KeyError:
            self._selector.register(fd, selectors.EVENT_WRITE,
                                    (None, handle))
        else:
            mask, (reader, writer) = key.events, key.data
            self._selector.modify(fd, mask | selectors.EVENT_WRITE,
                                  (reader, handle))
            if writer is not None:
                writer.cancel()

# asyncio.selector_events._SelectorSocketTransport
   def write(self, data):
        What is omitted are some tests
        if not self._ Buffer: if the cache is empty, directly try to send data through socket
            # Optimization: try to send now.
            try:
                n = self._sock.send(data)
            except (BlockingIOError, InterruptedError):
                pass
            except Exception as exc:
                self._fatal_error(exc, 'Fatal write error on socket transport')
                return
            else:
                data = data[n:]
                if not data:
                    Return ා if it is sent directly through the socket, it will end
            #Not all was written; register write handler
            self._loop._add_writer(self._sock_fd, self._write_ready)  
            #Create a write processor, modify and listen to the registered FD status or registered FD write events

        # Add it to the buffer.
        self._ buffer.extend (data) if there is a cache, store the data in the cache
        self._ maybe_ pause_ Protocol() ා SET protocol write pause if cache size reaches 64M (default)

    def _ write_ Ready (self): callback function after having a writable FD
        assert self._buffer, 'Data should not be empty'

        if self._conn_lost:
            return
        try:
            n = self._ sock.send (self._ Buffer) send data in cache
        except (BlockingIOError, InterruptedError):
            pass
        except Exception as exc:
            self._loop._remove_writer(self._sock_fd)
            self._buffer.clear()
            self._fatal_error(exc, 'Fatal write error on socket transport')
            if self._empty_waiter is not None:
                self._empty_waiter.set_exception(exc)
        else:
            if n:
                del self._buffer[:n]
            self._ maybe_ resume_ Protocol() (may append to buffer.)
            if not self._buffer:
                self._loop._remove_writer(self._sock_fd)
                if self._empty_waiter is not None:
                    self._empty_waiter.set_result(None)
                if self._closing:
                    self._call_connection_lost(None)
                elif self._eof:
                    self._sock.shutdown(socket.SHUT_WR)

If calledStreamWriter.writeMethod (write of channel)
Try sending directly first
If the sending fails, put the data into the cache, create the event handler and register (or update) the FD monitored by the selector (add the event handler to the event loop if the selector finds that the FD can be written), and call if the cache is full (> 64M)FlowControlMixin.pause_writing()Pause the write state, create the future when the drain method calls the writer’s refresh cache area, and wait for future to complete.FlowControlMixin.resume_writing()To release the pause state and give future results. Then you can write the data to the cache again.

Asynchronous processing of read events

Then take a lookStreamReaderClass:

class StreamReader():
    ...
    def set_ Transport (self, transport): set read channel
        assert self._transport is None, 'Transport already set'
        self._transport = transport

    def feed_ Data (self, data): channel will call protocol.data_ Received passes socket data to feed_ Data, then stored in the cache
        assert not self._eof, 'feed_data after feed_eof'

        if not data:
            return

        self._ buffer.extend (data) cache
        self._ wakeup_ Waiter() (wake up process)

        if (self._transport is not None and
                not self._paused and
                len(self._ buffer) > 2 * self._ Limit): if the cache size is more than twice the limit, pause the transmission?
            try:
                self._transport.pause_reading()
            except NotImplementedError:
                # The transport can't be paused.
                # We'll just have to buffer all data.
                # Forget the transport so we don't keep trying.
                self._transport = None
            else:
                self._paused = True

    async def _wait_for_data(self, func_name): 
    #Call channel resume_ Reading method, and then wait for the data to be transmitted (wait for the feed_ data() or feed_ Eof() called, self_ Wait will be over)
        """Wait until feed_data() or feed_eof() is called.

        If stream was paused, automatically resume it.
        """
        if self._waiter is not None:
            raise RuntimeError(
                f'{func_name}() called while another coroutine is '
                f'already waiting for incoming data')

        assert not self._eof, '_wait_for_data after EOF'

        # Waiting for data while paused will make deadlock, so prevent it.
        # This is essential for readexactly(n) for case when n > self._limit.
        if self._ Paused: if it is paused now
            self._ Paid = false ා change to false
            self._transport.resume_reading()  
            #Call channel resume_ The reading () method restores the reading state, modifies the pause state of the channel,
            #Then use the selector (modify to modify the registered FD bound by the channel) or (register to register the FD)

        self._waiter = self._loop.create_future()
        try:
            await self._waiter
        finally:
            self._waiter = None

    async def read(self, n=-1):
        ....
        if not self._ buffer and not self._ EOF: if there is no data in the cache, wait_ wait_ for_ data
            await self._wait_for_data('read')
        ...
# asyncio.selector_events.BaseSelectorEventLoop
    def _add_reader(self, fd, callback, *args):
        self._check_closed()
        handle = events.Handle(callback, args, self, None)   
        #To create an event handler, the callback function reads the data from the socket or cache and then stores it in the reader's cache
        try:
            key = self._selector.get_key(fd)
        except KeyError:
            self._selector.register(fd, selectors.EVENT_READ,
                                    (handle, none)) (register the FD monitored by the selector and pass it to the event handler (register read)
        else:
            mask, (reader, writer) = key.events, key.data
            self._selector.modify(fd, mask | selectors.EVENT_READ,
                                  (handle, writer)) (modify the FD that the selector listens to and pass it to the event handler, (register write)
            if reader is not None:
                reader.cancel()

# selector_events._SelectorSocketTransport
    def resume_reading(self):
        if self._closing or not self._paused:
            return
        self._ Paused = false ා pause state changed to false
        self._ add_ reader(self._ sock_ fd, self._ read_ Ready) register or modify FD listening events
        if self._loop.get_debug():
            logger.debug("%r resumes reading", self)

takeread(10)For example, read line reads 10 bytes of content. If the current cache is empty and no end character is encountered,await self._wait_for_data('read')Create a future wait if currently inRead pauseRelease of stateRead pauseState and call theresume_readingMethods(_SelectorSocketTransport.resume_reading)Create an event handler (there are two callback functions,_SelectorSocketTransport._read_ready__data_receivedReceive data and store it in the reader’s cache, wake up_wait_for_dataCreate future) to register (or update)selectorFor the monitored FD, if the selector detects that there is a readable FD, the read event handler bound by the FD is added to the current event loop.