linux下使用 epoll优化

可以参考的tcp框架

linux 下使用 epoll优化,你必须知道的概念

level trigger, 和 edge trigger

名字 原理
水平出发(LT) 当进行socket通信的时候,保证了数据的完整输出,进行IO操作的时候,如果还有数据,就会一直的通知你。
边缘触发(ET) 每次内核只会通知一次,大大减少了内核资源的浪费,提高效率 【不能保证数据的完整。不能及时的取出所有的数据】

关于读事件,如果业务可以保证每次都可以读完,那就可以使用ET,否则使用LT。

对于写事件,如果一次性可以写完那就可以使用LT,写完删除写事件就可以了;但是如果写的数据很大也不在意延迟,那么就可以使用ET,因为ET可以保证在发送缓冲区变为空时才再次通知(而LT则是发送缓冲区空了就可以通知就绪,这样就每次触发就只能写一点点数据,内核切换开销以及内存拷贝开销过大)

需要注意的细节

  • ET要避免"short read"的问题,比如用户收到100个字节,触发了一次边沿警告,读了50个字节,剩下50个字节没读。但是没有警告。
  • LT: 由于只要还有数据,内核就会不停的从内核空间转到用户空间,所有占用了大量内核资源,试想一下当有大量数据到来的时候,每次读取一个字节,这样就会不停的进行切换。内核资源的浪费严重。效率来讲也是很低的。

学习视频

视频里面有 reactor模式的写法

epoll核心概念

image-20220319200533509

http://static.open-open.com/lib/uploadImg/20140911/20140911103834_133.jpg
名字 概念
所有io,就绪io 所有连接,活跃链接
epll_wait(epfd,events,len,-1) epfd 表示哪一个epoll,events表示就绪队列
事件 类型
epollin 读事件
epollout 写事件
epollerr 异常,对应的文件描述符有error;
epollpri 紧急数据事件;
对应的文件描述符有error; 事件为边沿触发方式(默认为水平触发方式)。

c原生api

1
2
3
4
5
#include <sys/epoll.h>

int epoll_wait(int epfd, struct epoll_event *events,
               int maxevents, int timeout);
int epoll_pwait(int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t *sigmask);
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
/* Copyright (C) 2002-2019 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */
#ifndef        _SYS_EPOLL_H
#define        _SYS_EPOLL_H        1
#include <stdint.h>
#include <sys/types.h>
#include <bits/types/sigset_t.h>
/* Get the platform-dependent flags.  */
#include <bits/epoll.h>
#ifndef __EPOLL_PACKED
# define __EPOLL_PACKED
#endif
enum EPOLL_EVENTS
  {
    EPOLLIN = 0x001,
#define EPOLLIN EPOLLIN
    EPOLLPRI = 0x002,
#define EPOLLPRI EPOLLPRI
    EPOLLOUT = 0x004,
#define EPOLLOUT EPOLLOUT
    EPOLLRDNORM = 0x040,
#define EPOLLRDNORM EPOLLRDNORM
    EPOLLRDBAND = 0x080,
#define EPOLLRDBAND EPOLLRDBAND
    EPOLLWRNORM = 0x100,
#define EPOLLWRNORM EPOLLWRNORM
    EPOLLWRBAND = 0x200,
#define EPOLLWRBAND EPOLLWRBAND
    EPOLLMSG = 0x400,
#define EPOLLMSG EPOLLMSG
    EPOLLERR = 0x008,
#define EPOLLERR EPOLLERR
    EPOLLHUP = 0x010,
#define EPOLLHUP EPOLLHUP
    EPOLLRDHUP = 0x2000,
#define EPOLLRDHUP EPOLLRDHUP
    EPOLLEXCLUSIVE = 1u << 28,
#define EPOLLEXCLUSIVE EPOLLEXCLUSIVE
    EPOLLWAKEUP = 1u << 29,
#define EPOLLWAKEUP EPOLLWAKEUP
    EPOLLONESHOT = 1u << 30,
#define EPOLLONESHOT EPOLLONESHOT
    EPOLLET = 1u << 31
#define EPOLLET EPOLLET
  };
/* Valid opcodes ( "op" parameter ) to issue to epoll_ctl().  */
#define EPOLL_CTL_ADD 1        /* Add a file descriptor to the interface.  */
#define EPOLL_CTL_DEL 2        /* Remove a file descriptor from the interface.  */
#define EPOLL_CTL_MOD 3        /* Change file descriptor epoll_event structure.  */
typedef union epoll_data
{
  void *ptr;
  int fd;
  uint32_t u32;
  uint64_t u64;
} epoll_data_t;
struct epoll_event
{
  uint32_t events;        /* Epoll events */
  epoll_data_t data;        /* User data variable */
} __EPOLL_PACKED;
__BEGIN_DECLS
/* Creates an epoll instance.  Returns an fd for the new instance.
   The "size" parameter is a hint specifying the number of file
   descriptors to be associated with the new instance.  The fd
   returned by epoll_create() should be closed with close().  */
extern int epoll_create (int __size) __THROW;
/* Same as epoll_create but with an FLAGS parameter.  The unused SIZE
   parameter has been dropped.  */
extern int epoll_create1 (int __flags) __THROW;
/* Manipulate an epoll instance "epfd". Returns 0 in case of success,
   -1 in case of error ( the "errno" variable will contain the
   specific error code ) The "op" parameter is one of the EPOLL_CTL_*
   constants defined above. The "fd" parameter is the target of the
   operation. The "event" parameter describes which events the caller
   is interested in and any associated user data.  */
extern int epoll_ctl (int __epfd, int __op, int __fd,
                      struct epoll_event *__event) __THROW;
/* Wait for events on an epoll instance "epfd". Returns the number of
   triggered events returned in "events" buffer. Or -1 in case of
   error with the "errno" variable set to the specific error code. The
   "events" parameter is a buffer that will contain triggered
   events. The "maxevents" is the maximum number of events to be
   returned ( usually size of "events" ). The "timeout" parameter
   specifies the maximum wait time in milliseconds (-1 == infinite).
   This function is a cancellation point and therefore not marked with
   __THROW.  */
extern int epoll_wait (int __epfd, struct epoll_event *__events,
                       int __maxevents, int __timeout);
/* Same as epoll_wait, but the thread's signal mask is temporarily
   and atomically replaced with the one provided as parameter.
   This function is a cancellation point and therefore not marked with
   __THROW.  */
extern int epoll_pwait (int __epfd, struct epoll_event *__events,
                        int __maxevents, int __timeout,
                        const __sigset_t *__ss);
__END_DECLS
#endif /* sys/epoll.h */

golang下的完整示例

参考代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package main

import (
	"fmt"
	"golang.org/x/sys/unix"
	"log"
	"net"
	"os"
	"syscall"
)

const (
	//EPOLLET        = syscall.EPOLLPRI | syscall.EPOLLIN //这里只是监听读事件
	MaxEpollEvents = 32
)

var buf [8]byte

func echo(epollfd, fd int) {
	//defer syscall.Close(fd)
	//for {
	nbytes, e := syscall.Read(fd, buf[:])
	if nbytes > 0 {
		fmt.Printf(">>> %s", buf)
		_, e := syscall.Write(fd, buf[:nbytes])
		if e != nil {
			log.Printf("error write [%+v]", e)
		}
		fmt.Printf("<<< %s", buf)
	}
	if e != nil {
		log.Printf("error %v\n", e)
	}
	if nbytes == 0 {
		//如果无法读取,说明客户端被关闭了,要移除连接
		if err := syscall.EpollCtl(epollfd, syscall.EPOLL_CTL_DEL, fd, nil); err != nil {
			fmt.Println("epoll_ctl: ", err)
			//os.Exit(1)
		}
		log.Printf("close epoll fd %v\n", fd)

	}
	//}
}
func main() {
	var event syscall.EpollEvent
	var events [MaxEpollEvents]syscall.EpollEvent
	fd, err := syscall.Socket(syscall.AF_INET, syscall.O_NONBLOCK|syscall.SOCK_STREAM, 0) //创建socket
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer syscall.Close(fd)
	if err = syscall.SetNonblock(fd, true); err != nil { //设置非阻塞模式
		fmt.Println("setnonblock1: ", err)
		os.Exit(1)
	}
	addr := syscall.SockaddrInet4{Port: 50000}
	copy(addr.Addr[:], net.ParseIP("0.0.0.0").To4())
	syscall.Bind(fd, &addr) //绑定ip,端口。
	syscall.Listen(fd, 10)  //监听端口
	epfd, e := syscall.EpollCreate1(0)
	if e != nil {
		fmt.Println("epoll_create1: ", e)
		os.Exit(1)
	}
	defer syscall.Close(epfd)
	event.Events = syscall.EPOLLIN
	event.Fd = int32(fd) //设置监听描述符
	if e = syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, fd, &event); e != nil {
		fmt.Println("epoll_ctl: ", e)
		os.Exit(1)
	}
	log.Printf("ok=>enter\n")
	for {
		nevents, e := syscall.EpollWait(epfd, events[:], -1) //获取就绪事件
		if e != nil {
			if e == syscall.EINTR {
				continue
			}
			fmt.Printf("epoll_wait: %+v\n", e)
			//break
			continue
		}
		fmt.Printf("nevent cnt = %d\n", nevents)
		for ev := 0; ev < nevents; ev++ {
			if int(events[ev].Fd) == fd {
				connFd, _, err := syscall.Accept(fd) //接受请求
				if err != nil {
					fmt.Println("accept: ", err)
					continue
				}
				syscall.SetNonblock(fd, true)
				//syscall.EPOLLIN
				//监听读事件, 模式是水平触发
				event.Events = syscall.EPOLLIN | syscall.EPOLLPRI
				event.Fd = int32(connFd)
				if err := syscall.EpollCtl(epfd, syscall.EPOLL_CTL_ADD, connFd, &event); err != nil {
					fmt.Print("epoll_ctl: ", connFd, err)
					os.Exit(1)
				}
			} else {
				//var rEvents uint32
				if ((events[ev].Events & unix.POLLHUP) != 0) && ((events[ev].Events & unix.POLLIN) == 0) {
					//rEvents |= EventErr
					log.Printf("event err\n")
				}
				if (events[ev].Events&unix.EPOLLERR != 0) || (events[ev].Events&unix.EPOLLOUT != 0) {
					log.Printf("write event\n")
				}
				if events[ev].Events&(unix.EPOLLIN|unix.EPOLLPRI|unix.EPOLLRDHUP) != 0 {
					log.Printf("read event")
				}
				log.Printf("events.fd=%d\n", events[ev].Fd)
				log.Printf("events %#v", events[ev])
				/*go*/ echo(epfd, int(events[ev].Fd))
			}
		}
	}
}