Open gh-liu opened 6 months ago
Nocopy Buffer 基于链表数组实现:将 []byte 数组抽象为 block,以链表拼接的形式将 block 组合为 Nocopy Buffer 同时引入了引用计数、对象池、nocopy API
// nocopy_linkbuffer.go
// 链表节点
type linkBufferNode struct {
buf []byte // buffer /// 字节缓冲区
off int // read-offset /// 读偏移量,buf可读范围 buf[off:len(buf)]
malloc int // write-offset /// 写偏移量,buf可写范围 buf[len(buf):malloc]
refer int32 // reference count /// 引用计数
mode uint8 // mode store all bool bit status /// 模式:readonlyMask只读 nocopyReadMask不可复制
origin *linkBufferNode // the root node of the extends /// 原对象
next *linkBufferNode // the next node of the linked buffer /// 链表下一个节点
}
// buf: -------------------------------------
// │ │ │
// off读偏移量 len(buf) malloc写偏移量
// 节点池
var linkedPool = sync.Pool{
New: func() interface{} {
return &linkBufferNode{
refer: 1, // 自带 1 引用
}
},
}
// 初始化一个节点
// newLinkBufferNode create or reuse linkBufferNode.
// Nodes with size <= 0 are marked as readonly, which means the node.buf is not allocated by this mcache.
func newLinkBufferNode(size int) *linkBufferNode {
var node = linkedPool.Get().(*linkBufferNode) /// 从池中获取一个节点
// reset node offset
node.off, node.malloc, node.refer, node.mode = 0, 0, 1, defaultLinkBufferMode /// 重置节点读写便宜、引用技术、模式
if size <= 0 { /// 节点大小 <= 0
node.setMode(readonlyMask, true) /// 只读模式
return node
}
if size < LinkBufferCap {
size = LinkBufferCap /// 设置默认大小 4k
}
node.buf = malloc(0, size) /// 分配内存
return node
}
// nocopy.go
//
// 分配字节数组
// malloc limits the cap of the buffer from mcache.
func malloc(size, capacity int) []byte {
if capacity > mallocMax { /// mallocMax 为8MB
return dirtmake.Bytes(size, capacity) /// 底层使用 runtime.mallocgc 进行分配
}
return mcache.Malloc(size, capacity) /// mcache底层进行了缓存
}
// linkBufferNode 提供的一些 API
// 剩余可读字节长度
func (node *linkBufferNode) Len() (l int) {
return len(node.buf) - node.off
}
// 当前可读数据量是否为空
func (node *linkBufferNode) IsEmpty() (ok bool) {
return node.off == len(node.buf)
}
// 重置
func (node *linkBufferNode) Reset() {
// 是子切片或引用数不等于1,则不能被重置
if node.origin != nil || atomic.LoadInt32(&node.refer) != 1 {
return
}
node.off, node.malloc = 0, 0
node.buf = node.buf[:0]
return
}
// 往后读取n个字节,并移动读指针
// 调用方自己通过Len方法检查长度n
func (node *linkBufferNode) Next(n int) (p []byte) {
off := node.off
node.off += n
return node.buf[off:node.off]
}
// 往后读取n个字节,但不移动读指针
func (node *linkBufferNode) Peek(n int) (p []byte) {
return node.buf[node.off : node.off+n]
}
// 申请字节数据用来写数据,并移动写指针
func (node *linkBufferNode) Malloc(n int) (buf []byte) {
malloc := node.malloc
node.malloc += n
return node.buf[malloc:node.malloc]
}
// 同Next方法一致,读取n的字节,但是返回的是一个节点
// 同时,返回的节点会设置 origin 节点
//
// Refer holds a reference count at the same time as Next, and releases the real buffer after Release.
// The node obtained by Refer is read-only.
func (node *linkBufferNode) Refer(n int) (p *linkBufferNode) {
p = newLinkBufferNode(0) /// 创建只读节点
p.buf = node.Next(n) /// 新建的节点指向 [off:off+n] 范围
if node.origin != nil { /// 当前节点本身也是子节点,也是指向原始节点
p.origin = node.origin
} else {
p.origin = node
}
atomic.AddInt32(&p.origin.refer, 1) /// 设置根节点的引用计数
return p
}
// 释放节点:如果是子节点,则释放原始节点;如果当前节点引用数为0,则重置各属性,释放buf内存,将节点放回节点池
//
// Release consists of two parts:
// 1. reduce the reference count of itself and origin.
// 2. recycle the buf when the reference count is 0.
func (node *linkBufferNode) Release() (err error) {
if node.origin != nil {
node.origin.Release() /// 释放原始节点
}
// release self
if atomic.AddInt32(&node.refer, -1) == 0 {
// readonly nodes cannot recycle node.buf, other node.buf are recycled to mcache.
if node.reusable() { /// 可复用,则释放内存
free(node.buf)
}
node.buf, node.origin, node.next = nil, nil, nil /// 重置各属性
linkedPool.Put(node) /// 放回节点池
}
return nil
}
// nocopy_linkbuffer_norace.go
// nocopy_linkbuffer_race.go
type LinkBuffer = UnsafeLinkBuffer
// nocopy_linkbuffer.go
// UnsafeLinkBuffer implements ReadWriter.
type UnsafeLinkBuffer struct {
length int64 /// 可读取数据量
mallocSize int /// 已写数据量
head *linkBufferNode // release head /// 上一次释放的可读节点位置
read *linkBufferNode // read head /// 当前可读节点位置
flush *linkBufferNode // malloc head /// 上一次提交的可写节点位置
write *linkBufferNode // malloc tail /// 当前可写节点位置
caches [][]byte // buf allocated by Next when cross-package, which should be freed when release /// 读取的时候可能会跨多个节点,此时会产生copy,在这里进行保存
}
// 初始化LinkBuffer: 新建一个节点
//
// NewLinkBuffer size defines the initial capacity, but there is no readable data.
func NewLinkBuffer(size ...int) *LinkBuffer {
var buf = &LinkBuffer{}
var l int
if len(size) > 0 {
l = size[0]
}
var node = newLinkBufferNode(l)
buf.head, buf.read, buf.flush, buf.write = node, node, node, node
return buf
}
// UnsafeLinkBuffer实现的两个接口,由名字可知定义了数据的读写方法
var _ Reader = &LinkBuffer{}
var _ Writer = &LinkBuffer{}
先看一下Reader
/Writer
两个接口是如何定义的:
// nocopy.go
// Reader is a collection of operations for nocopy reads.
//
// For ease of use, it is recommended to implement Reader as a blocking interface,
// rather than simply fetching the buffer.
// For example, the return of calling Next(n) should be blocked if there are fewer than n bytes, unless timeout.
// The return value is guaranteed to meet the requirements or an error will be returned.
type Reader interface { /// 定义了一系列无内存复制的读操作
// Next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
//
// If there are fewer than n bytes in the buffer, Next returns will be blocked
// until data enough or an error occurs (such as a wait timeout).
//
// The slice p is only valid until the next call to the Release method.
// Next is not globally optimal, and Skip, ReadString, ReadBinary methods
// are recommended for specific scenarios.
//
// Return: len(p) must be n or 0, and p and error cannot be nil at the same time.
Next(n int) (p []byte, err error) /// 读取 n 字节,且移动读指针
// Peek returns the next n bytes without advancing the reader.
// Other behavior is the same as Next.
Peek(n int) (buf []byte, err error) /// 读取 n 字节,但不移动读指针
// Skip the next n bytes and advance the reader, which is
// a faster implementation of Next when the next data is not used.
Skip(n int) (err error) /// 跳过n字节,移动读指针
// Until reads until the first occurrence of delim in the input,
// returning a slice stops with delim in the input buffer.
// If Until encounters an error before finding a delimiter,
// it returns all the data in the buffer and the error itself (often ErrEOF or ErrConnClosed).
// Until returns err != nil only if line does not end in delim.
Until(delim byte) (line []byte, err error) /// 读取直到碰到指定字节,移动读指针
// ReadString is a faster implementation of Next when a string needs to be returned.
// It replaces:
//
// var p, err = Next(n)
// return string(p), err
//
ReadString(n int) (s string, err error) /// 读取n字节,返回字符串
// ReadBinary is a faster implementation of Next when it needs to
// return a copy of the slice that is not shared with the underlying layer.
// It replaces:
//
// var p, err = Next(n)
// var b = make([]byte, n)
// copy(b, p)
// return b, err
//
ReadBinary(n int) (p []byte, err error) /// 读取n字节,返回复制的字节数组
// ReadByte is a faster implementation of Next when a byte needs to be returned.
// It replaces:
//
// var p, err = Next(1)
// return p[0], err
//
ReadByte() (b byte, err error) /// 读取一个字节
// Slice returns a new Reader containing the Next n bytes from this Reader.
//
// If you want to make a new Reader using the []byte returned by Next, Slice already does that,
// and the operation is zero-copy. Besides, Slice would also Release this Reader.
// The logic pseudocode is similar:
//
// var p, err = this.Next(n)
// var reader = new Reader(p) // pseudocode
// this.Release()
// return reader, err
//
Slice(n int) (r Reader, err error) /// 读取n字节(zero-copy),返回 Reader 接口实现
// Release the memory space occupied by all read slices. This method needs to be executed actively to
// recycle the memory after confirming that the previously read data is no longer in use.
// After invoking Release, the slices obtained by the method such as Next, Peek, Skip will
// become an invalid address and cannot be used anymore.
Release() (err error) /// 释放被读取的字节数组,释放前需要确认被读取的数组不再被使用
// Len returns the total length of the readable data in the reader.
Len() (length int) /// 可读字节数组长度
}
// Writer is a collection of operations for nocopy writes.
//
// The usage of the design is a two-step operation, first apply for a section of memory,
// fill it and then submit. E.g:
//
// var buf, _ = Malloc(n)
// buf = append(buf[:0], ...)
// Flush()
//
// Note that it is not recommended to submit self-managed buffers to Writer.
// Since the writer is processed asynchronously, if the self-managed buffer is used and recycled after submission,
// it may cause inconsistent life cycle problems. Of course this is not within the scope of the design.
type Writer interface { /// 定义了一系列无内存复制的写操作
// Malloc returns a slice containing the next n bytes from the buffer,
// which will be written after submission(e.g. Flush).
//
// The slice p is only valid until the next submit(e.g. Flush).
// Therefore, please make sure that all data has been written into the slice before submission.
Malloc(n int) (buf []byte, err error) /// 分配n个可写的字节数组
// WriteString is a faster implementation of Malloc when a string needs to be written.
// It replaces:
//
// var buf, err = Malloc(len(s))
// n = copy(buf, s)
// return n, err
//
// The argument string s will be referenced based on the original address and will not be copied,
// so make sure that the string s will not be changed.
WriteString(s string) (n int, err error) /// 将字符串写入
// WriteBinary is a faster implementation of Malloc when a slice needs to be written.
// It replaces:
//
// var buf, err = Malloc(len(b))
// n = copy(buf, b)
// return n, err
//
// The argument slice b will be referenced based on the original address and will not be copied,
// so make sure that the slice b will not be changed.
WriteBinary(b []byte) (n int, err error) /// 将字节数组写入
// WriteByte is a faster implementation of Malloc when a byte needs to be written.
// It replaces:
//
// var buf, _ = Malloc(1)
// buf[0] = b
//
WriteByte(b byte) (err error) /// 写入一个字节
// WriteDirect is used to insert an additional slice of data on the current write stream.
// For example, if you plan to execute:
//
// var bufA, _ = Malloc(nA)
// WriteBinary(b)
// var bufB, _ = Malloc(nB)
//
// It can be replaced by:
//
// var buf, _ = Malloc(nA+nB)
// WriteDirect(b, nB)
//
// where buf[:nA] = bufA, buf[nA:nA+nB] = bufB.
WriteDirect(p []byte, remainCap int) error /// 写入p的同时再分配remainCap个字节
// MallocAck will keep the first n malloc bytes and discard the rest.
// The following behavior:
//
// var buf, _ = Malloc(8)
// buf = buf[:5]
// MallocAck(5)
//
// equivalent as
// var buf, _ = Malloc(5)
//
MallocAck(n int) (err error) /// 丢弃分配多的字节数量
// Append the argument writer to the tail of this writer and set the argument writer to nil,
// the operation is zero-copy, similar to p = append(p, w.p).
Append(w Writer) (err error) /// TODO?
// Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned.
// Its behavior is equivalent to the io.Writer hat already has parameters(slice b).
Flush() (err error) /// 提交所有写好的数据
// MallocLen returns the total length of the writable data that has not yet been submitted in the writer.
MallocLen() (length int) /// 可写数据长度
}
着重分析下:
type Reader interface {
// ...
Next(n int) (p []byte, err error) /// 读取 n 字节,且移动读指针
// ...
Release() (err error) /// 释放被读取的字节数组,释放前需要确认被读取的数组不再被使用
// ...
Slice(n int) (r Reader, err error) /// 读取n字节(zero-copy),返回 Reader 接口实现
// ...
}
type Writer interface {
// ...
Malloc(n int) (buf []byte, err error) /// 分配n个可写的字节数组
// ...
Flush() (err error) /// 提交所有写好的数据
// ...
}
// 存在两种场景:
// 1. 单Node获取数据,zero-copy实现
// 2. 跨Node获取数据,非zero-copy实现:
//
// Next implements Reader.
func (b *UnsafeLinkBuffer) Next(n int) (p []byte, err error) {
// ...
b.recalLen(-n) // re-cal length /// 减少可读数量
// single node
if b.isSingleNode(n) { /// 单节点读取
return b.read.Next(n), nil /// 从节点获取
}
// multiple nodes /// 跨节点读取
var pIdx int
if block1k < n && n <= mallocMax { /// 对字节数组 p 进行分配
p = malloc(n, n)
b.caches = append(b.caches, p)
} else {
p = dirtmake.Bytes(n, n)
}
var l int
for ack := n; ack > 0; ack = ack - l { /// 跨节点读取数据,复制到字节数组 p
l = b.read.Len() /// 当前可读节点的可读长度
if l >= ack { /// 大于剩余需要的长度
pIdx += copy(p[pIdx:], b.read.Next(ack)) /// 复制
break /// 结束
} else if l > 0 { /// 小于剩余需要的长度、且当前可读节点的可读长度大于0
pIdx += copy(p[pIdx:], b.read.Next(l)) /// 复制
}
b.read = b.read.next /// 移动到下一个可读节点
}
_ = pIdx
return p, nil
}
// 释放已经被读取的数据节点
//
// Release the node that has been read.
// b.flush == nil indicates that this LinkBuffer is created by LinkBuffer.Slice
func (b *UnsafeLinkBuffer) Release() (err error) {
for b.read != b.flush && b.read.Len() == 0 { /// 可读节点在已经提交的节点之前,且可读节点的可读长度为0
b.read = b.read.next /// 移动可读节点
}
for b.head != b.read { /// 从释放指针一直移动到可读指针,释放每一个节点
node := b.head
b.head = b.head.next
node.Release()
}
for i := range b.caches { /// 释放跨界点读取时,产生的复制数据
free(b.caches[i])
b.caches[i] = nil
}
b.caches = b.caches[:0] /// 重置长度
return nil
}
// 返回一个新的 LinkBuffer,是对当前 LinkBuffer 的 zero-copy,只读
//
// Slice returns a new LinkBuffer, which is a zero-copy slice of this LinkBuffer,
// and only holds the ability of Reader.
//
// Slice will automatically execute a Release.
func (b *UnsafeLinkBuffer) Slice(n int) (r Reader, err error) {
if n <= 0 { /// 返回一个大小为0的只读节点
return NewLinkBuffer(0), nil
}
// check whether enough or not.
if b.Len() < n {
return r, fmt.Errorf("link buffer readv[%d] not enough", n)
}
b.recalLen(-n) // re-cal length /// 重新计算buffer的长度
// just use for range
p := new(LinkBuffer) /// 新建 LinkBuffer
p.length = int64(n)
defer func() {
// set to read-only
p.flush = p.flush.next
p.write = p.flush
}()
// single node
if b.isSingleNode(n) { /// 单节点,使用Refer方法进行zero-copy,返回的节点是只读的
node := b.read.Refer(n)
p.head, p.read, p.flush = node, node, node
return p, nil
}
// multiple nodes /// 跨节点
l := b.read.Len()
node := b.read.Refer(l) /// node 作为新节点的头节点(复制的当前读节点)
b.read = b.read.next /// 移动读指针
p.head, p.read, p.flush = node, node, node
for ack := n - l; ack > 0; ack = ack - l {
l = b.read.Len()
if l >= ack { /// 当前节点大于需要的长度
p.flush.next = b.read.Refer(ack) /// 复制
p.flush = p.flush.next /// 移动已经提交数据的节点指针:表示数据可读
break
} else if l > 0 { /// 当前节点小于需要的长度、当前节点长度大于0
p.flush.next = b.read.Refer(l) /// 复制
p.flush = p.flush.next /// 移动已经提交数据的节点指针:表示数据可读
}
b.read = b.read.next
}
return p, b.Release() /// 释放已读的节点
}
// 分配可写的字节数组,写入的数据可在提交后被读取到
//
// Malloc pre-allocates memory, which is not readable, and becomes readable data after submission(e.g. Flush).
func (b *UnsafeLinkBuffer) Malloc(n int) (buf []byte, err error) {
// ...
b.mallocSize += n /// 增加已写的数据数量
b.growth(n) /// 当前写节点容量不够时候,移动到下一个写节点;如果没有下一个写节点,新增一个写节点并移动到新增的写节点
return b.write.Malloc(n), nil /// 从新增的节点获取可写数据
}
// 提交已写数据
//
// Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned.
func (b *UnsafeLinkBuffer) Flush() (err error) {
b.mallocSize = 0 /// 设置已读的数量为0
// FIXME: The tail node must not be larger than 8KB to prevent Out Of Memory.
if cap(b.write.buf) > pagesize { /// 如果写节点的大小大于 8K
b.write.next = newLinkBufferNode(0) /// 新建一个只读节点
b.write = b.write.next /// 将新建的只读节点置为当前写节点
}
var n int
for node := b.flush; node != b.write.next; node = node.next { /// 从上一次提交的节点开始,到当前写节点为止
delta := node.malloc - len(node.buf) /// 节点已经写了的字节数量
if delta > 0 { /// 如果大于0
n += delta /// 用于重新计算buffer的长度
node.buf = node.buf[:node.malloc] /// 移动节点内的写指针,即提交数据
}
}
b.flush = b.write /// 将当前节点记录,下一次提交从这里开始
// re-cal length
b.recalLen(n) /// 重新计算buffer长度
return nil
}
netpoll
1. 监听
net.Listener
的fd
EventLoop
:EventLoop
接口的eventLoop
结构体的Serve
方法:newServer
新建的server
,和(*server).Run
方法:FDOperator
的Control
方法,参数为PollReadable
:Poll
的具体实现defaultPoll
的Control
方法,参数为FDOperator
和PollReadable
:2.
poll
运行:轮询是否有就绪事件新建
poll
:运行
poll
:3. 处理就绪事件
重新看一下
FDOperator
结构体:OnRead
OnWrite
Inputs, InputAck; Outputs, OutputAck