记录一下最近这学期学习的sylar服务器框架项目,输出整理一下项目的结构,用到的知识和自己的体会
项目仓库地址
https://github.com/sylar-yin/sylar/
整理博客过程中参考的大佬资料链接:
============================================
基础介绍
ByteArray模块提供了一个字节容器数组类,提供序列化和反序列化功能,并且同时支持大小端,用于节省内存
压缩算法使用ZigZag,底层使用数据块链表形式储存
(ByteArray模块在sylar基础模块中并没有被使用,主要是在sylar后续写分布式支持的时候写的的Rock RPC模块中使用)
该模块包含一个类ByteArray,提供了各种类型的读/写方法,成员变量存储了数据链表的信息和字节序
// 字节数组容器,提供序列化和反序列化功能
class ByteArray {
public:
typedef std::shared_ptr
<ByteArray> ptr;
// 链表结构存放数据
struct Node {
Node(size_t s);
Node();
~Node();
char* ptr; // 指向数据
Node* next; // 指向下一个Node
size_t size; // 该Node大小
};
ByteArray(size_t base_size = 4096); // 构造函数
~ByteArray(); // 析构函数
void writeFint8 (int8_t value); // 写int8 (int8_t类型)
void writeFuint8 (uint8_t value); // 写uint8 (uint8_t类型)
void writeFint16 (int16_t value); // 写int16 (int16_t类型)
void writeFuint16 (uint16_t value); // 写uint16(uint16_t类型)
void writeFint32 (int32_t value); // 写int32 (int32_t类型)
void writeFuint32 (uint32_t value); // 写uint32(uint32_t类型)
void writeFint64 (int64_t value); // 写int64 (int64_t类型)
void writeFuint64 (uint64_t value); // 写uint64(uint64_t类型)
void writeInt32 (int32_t value); // 写int32 (Varint32类型)
void writeUint32 (uint32_t value); // 写uint32(无符号Varint32类型)
void writeInt64 (int64_t value); // 写int64 (Varint64类型)
void writeUint64 (uint64_t value); // 写uint64(无符号Varint64类型)
void writeFloat (float& value); // 写float
void writeDouble (double& value); // 写double
void writeStringF16(const std::string& value); // 写std::string,用uint16_t作为长度类型
void writeStringF32(const std::string& value); // 写std::string,用uint32_t作为长度类型
void writeStringF64(const std::string& value); // 写std::string,用uint64_t作为长度类型
void writeStringVint(const std::string& value); // 写std::string,用无符号Varint64作为长度类型
void writeStringWithoutLength(const std::string& value); // 写std::string,无长度
int8_t readFint8(); // 读int8 (int8_t类型)
uint8_t readFuint8(); // 读uint8 (uint8_t类型)
int16_t readFint16(); // 读int16 (int16_t类型)
uint16_t readFuint16(); // 读uint16(uint16_t类型)
int32_t readFint32(); // 读int32 (int32_t类型)
uint32_t readFuint32(); // 读uint32(uint32_t类型)
int64_t readFint64(); // 读int64 (int64_t类型)
uint64_t readFuint64(); // 读uint64(uint64_t类型)
int32_t readInt32(); // 读int32 (Varint32类型)
uint32_t readUint32(); // 读uint32(无符号Varint32类型)
int64_t readInt64(); // 读int64 (Varint64类型)
uint64_t readUint64(); // 读uint64(无符号Varint64类型)
float readFloat(); // 读float
double readDouble(); // 读double
std::string readStringF16(); // 读std::string,用uint16_t作为长度类型
std::string readStringF32(); // 读std::string,用uint32_t作为长度类型
std::string readStringF64(); // 读std::string,用uint64_t作为长度类型
std::string readStringVint(); // 读std::string,用无符号Varint64作为长度类型
void clear(); // 清空ByteArray
void write(const void* buf, size_t size); // 写操作(改变position)
void read(void* buf, size_t size); // 读操作(改变position)
void read(void* buf, size_t size, size_t position) const; // 读操作(不改变position)
size_t getPosition() const { return m_position; }
void setPosition(size_t v);
bool writeToFile(const std::string& name) const; // 写入文件
bool readFromFile(const std::string& name); // 读取文件
size_t getBaseSize() const { return m_baseSize; }
size_t getReadSize() const { return m_size - m_position; }
bool isLittleEndian() const; // 返回是否是小端序
void setIsLitteEndian(bool val); // 设置是否为小端序
std::string toString() const; // 转为std::string
std::string toHexString() const; // 转为16进制输出
// 获取可读取缓存,保存为iovec数组,不修改position
uint64_t getReadBuffer(std::vector
<iovec>& buffers, uint64_t len = ~0ull) const; // 从头开始
uint64_t getReadBuffer(std::vector
<iovec>& buffers, uint64_t len, uint64_t position) const; // 从position开始
// 获取可写入缓存,保存为iovec数组,不修改position
uint64_t getWriteBuffer(std::vector
<iovec>& buffers, uint64_t len);
size_t getSize() const { return m_size; } // 返回数据的长度
private:
void addCapacity(size_t size); // 增加容量
size_t getCapacity() const { return m_capacity - m_position; } // 获取剩余容量
private:
size_t m_baseSize; // 内存块大小
size_t m_position; // 当前操作位置
size_t m_capacity; // 当前总容量
size_t m_size; // 当前数据的大小
int8_t m_endian; // 字节序,默认大端
Node* m_root; // 第一个内存块指针
Node* m_cur; // 当前操作的内存块指针
};
Varint类型和ZigZag算法
Varint是一种使用一个或多个字节序列化整数的方法,会把整数编码为变长字节。对于32位整型数据经过Varint编码后需要15个字节,小的数字使用1个10个字节。在实际场景中小数字的使用率远远多于大数字,因此通过byte,大的数字使用5个bytes。64位整型数据编码后占用1Varint编码对于大部分场景都可以起到很好的压缩效果。
Zigzag算法将有符号负整数转为正数,这样能够节省字节,因为负数的二进制位几乎全为1。 链接:https://juejin.cn/post/7241846256287694904
ZigZag的压缩/解压可以使用简单的一行代码完成
// 压缩到ZigZag格式(32bit)
static uint32_t EncodeZigzag32(const int32_t& v) {
if(v < 0) {
return ((uint32_t)(-v)) * 2 - 1;
} else {
return v * 2;
}
}
// 压缩到ZigZag格式(64bit)
static uint64_t EncodeZigzag64(const int64_t& v) {
if(v < 0) {
return ((uint64_t)(-v)) * 2 - 1;
} else {
return v * 2;
}
}
// 解压缩ZigZag格式(32bit)
static int32_t DecodeZigzag32(const uint32_t& v) {
return (v >> 1) ^ -(v & 1);
}
// 解压缩ZigZag格式(64bit)
static int64_t DecodeZigzag64(const uint64_t& v) {
return (v >> 1) ^ -(v & 1);
}
构造/析构函数
对数据链表进行初始化
ByteArray::ByteArray(size_t base_size)
:m_baseSize(base_size)
,m_position(0)
,m_capacity(base_size)
,m_size(0)
,m_endian(SYLAR_BIG_ENDIAN)
,m_root(new Node(base_size))
,m_cur(m_root) {
}
ByteArray::~ByteArray() {
Node* tmp = m_root;
while(tmp) {
m_cur = tmp;
tmp = tmp->next;
delete m_cur;
}
}
链表相关
主要是对数据链表的操作方法,例如移动指针,添加容量等
ByteArray::Node::Node(size_t s)
:ptr(new char[s])
,next(nullptr)
,size(s) {
}
ByteArray::Node::Node()
:ptr(nullptr)
,next(nullptr)
,size(0) {
}
ByteArray::Node::~Node() {
if(ptr) {
delete[] ptr;
}
}
void ByteArray::setPosition(size_t v) {
if(v > m_capacity) {
throw std::out_of_range("set_position out of range");
}
m_position = v;
if(m_position > m_size) {
m_size = m_position;
}
m_cur = m_root;
while(v > m_cur->size) {
v -= m_cur->size;
m_cur = m_cur->next;
}
if(v == m_cur->size) {
m_cur = m_cur->next;
}
}
void ByteArray::addCapacity(size_t size) {
if(size == 0) {
return;
}
size_t old_cap = getCapacity();
if(old_cap >= size) {
return;
}
size = size - old_cap;
size_t count = (size / m_baseSize) + (((size % m_baseSize) > old_cap) ? 1 : 0);
Node* tmp = m_root;
while(tmp->next) {
tmp = tmp->next;
}
Node* first = NULL;
for(size_t i = 0; i < count; i++) {
tmp->next = new Node(m_baseSize);
if(first == NULL) {
first = tmp->next;
}
tmp = tmp->next;
m_capacity += m_baseSize;
}
if(old_cap == 0) {
m_cur = first;
}
}
uint64_t ByteArray::getReadBuffer(std::vector
<iovec>& buffers, uint64_t len) const {
len = len > getReadSize() ? getReadSize() : len;
if(len == 0) {
return 0;
}
uint64_t size = len;
size_t npos = m_position % m_baseSize;
size_t ncap = m_cur->size - npos;
struct iovec iov;
Node* cur = m_cur;
while(len > 0) {
if(ncap >= len) {
iov.iov_base = cur->ptr + npos;
iov.iov_len = len;
len = 0;
} else {
iov.iov_base = cur->ptr + npos;
iov.iov_len = ncap;
len -= ncap;
cur = cur->next;
ncap = cur->size;
npos = 0;
}
buffers.push_back(iov);
}
return size;
}
// ...
write
提供了写int8,uint8,int16,uint16,int32,uint32,int64,uint64,float,double,std::string的方法
void ByteArray::write(const void* buf, size_t size) {
if(size == 0) {
return;
}
addCapacity(size);
size_t npos = m_position % m_baseSize;
size_t ncap = m_cur->size - npos;
size_t bpos = 0;
while(size > 0) {
if(ncap >= size) {
memcpy(m_cur->ptr + npos, (const char*)buf + bpos, size);
if(m_cur->size == (npos + size)) {
m_cur = m_cur->next;
}
m_position += size;
bpos += size;
size = 0;
} else {
memcpy(m_cur->ptr + npos, (const char*)buf + bpos, ncap);
m_position += ncap;
bpos += ncap;
size -= ncap;
m_cur = m_cur->next;
ncap = m_cur->size;
npos = 0;
}
}
if(m_position > m_size) {
m_size = m_position;
}
}
void ByteArray::writeFint8 (int8_t value) {
write(&value, sizeof(value));
}
void ByteArray::writeFuint8 (uint8_t value) {
write(&value, sizeof(value));
}
void ByteArray::writeFint16 (int16_t value) {
if(m_endian != SYLAR_BYTE_ORDER) {
value = byteswap(value);
}
write(&value, sizeof(value));
}
void ByteArray::writeFuint16 (uint16_t value) {
if(m_endian != SYLAR_BYTE_ORDER) {
value = byteswap(value);
}
write(&value, sizeof(value));
}
void ByteArray::writeFint32 (int32_t value) {
if(m_endian != SYLAR_BYTE_ORDER) {
value = byteswap(value);
}
write(&value, sizeof(value));
}
void ByteArray::writeFuint32 (uint32_t value) {
if(m_endian != SYLAR_BYTE_ORDER) {
value = byteswap(value);
}
write(&value, sizeof(value));
}
// ...
void ByteArray::writeFloat (float& value) {
uint32_t v;
memcpy(&v, &value, sizeof(value));
writeFuint32(v);
}
void ByteArray::writeDouble (double& value) {
uint64_t v;
memcpy(&v, &value, sizeof(value));
writeFuint64(v);
}
void ByteArray::writeStringF16(const std::string& value) {
writeFuint16(value.size());
write(value.c_str(), value.size());
}
void ByteArray::writeStringF32(const std::string& value) {
writeFuint32(value.size());
write(value.c_str(), value.size());
}
// ...
read
提供了读int8,uint8,int16,uint16,int32,uint32,int64,uint64,float,double,std::string的方法
// read
void ByteArray::read(void* buf, size_t size) {
if(size > getReadSize()) {
throw std::out_of_range("not enough len");
}
size_t npos = m_position % m_baseSize;
size_t ncap = m_cur->size - npos;
size_t bpos = 0;
while(size > 0) {
if(ncap >= size) {
memcpy((char*)buf + bpos, m_cur->ptr + npos, size);
if(m_cur->size == (npos + size)) {
m_cur = m_cur->next;
}
m_position += size;
bpos += size;
size = 0;
} else {
memcpy((char*)buf + bpos, m_cur->ptr + npos, ncap);
m_position += ncap;
bpos += ncap;
size -= ncap;
m_cur = m_cur->next;
ncap = m_cur->size;
npos = 0;
}
}
}
void ByteArray::read(void* buf, size_t size, size_t position) const {
if(size > getReadSize()) {
throw std::out_of_range("not enough len");
}
size_t npos = position % m_baseSize;
size_t ncap = m_cur->size - npos;
size_t bpos = 0;
Node* cur = m_cur;
while(size > 0) {
if(ncap >= size) {
memcpy((char*)buf + bpos, cur->ptr + npos, size);
if(cur->size == (npos + size)) {
cur = cur->next;
}
position += size;
bpos += size;
size = 0;
} else {
memcpy((char*)buf + bpos, cur->ptr + npos, ncap);
position += ncap;
bpos += ncap;
size -= ncap;
cur = cur->next;
ncap = cur->size;
npos = 0;
}
}
}
int8_t ByteArray::readFint8() {
int8_t v;
read(&v, sizeof(v));
return v;
}
uint8_t ByteArray::readFuint8() {
uint8_t v;
read(&v, sizeof(v));
return v;
}
#define XX(type) \
type v; \
read(&v, sizeof(v)); \
if(m_endian == SYLAR_BYTE_ORDER) { \
return v; \
} else { \
return byteswap(v); \
}
int16_t ByteArray::readFint16() {
XX(int16_t);
}
uint16_t ByteArray::readFuint16() {
XX(uint16_t);
}
int32_t ByteArray::readFint32() {
XX(int32_t);
}
uint32_t ByteArray::readFuint32() {
XX(uint32_t);
}
// ...
float ByteArray::readFloat() {
uint32_t v = readFuint32();
float value;
memcpy(&value, &v, sizeof(value));
return value;
}
double ByteArray::readDouble() {
uint64_t v = readFuint64();
double value;
memcpy(&value, &v, sizeof(value));
return value;
}
std::string ByteArray::readStringF16() {
uint16_t len = readFuint16();
std::string buff;
buff.resize(len);
read(&buff[0], len);
return buff;
}
std::string ByteArray::readStringF32() {
uint32_t len = readFuint32();
std::string buff;
buff.resize(len);
read(&buff[0], len);
return buff;
}
// ...
读写文件
bool ByteArray::writeToFile(const std::string& name) const {
std::ofstream ofs;
ofs.open(name, std::ios::trunc | std::ios::binary);
if(!ofs) {
SYLAR_LOG_ERROR(g_logger) << "writeToFile name=" << name
<< "error, errno=" << errno << " errstr=" << strerror(errno);
return false;
}
int64_t read_size = getReadSize();
int64_t pos = m_position;
Node* cur = m_cur;
while(read_size > 0) {
int diff = pos % m_baseSize;
int64_t len = (read_size > (int64_t)m_baseSize ? m_baseSize : read_size) - diff;
ofs.write(cur->ptr + diff, len);
cur = cur->next;
pos += len;
read_size -= len;
}
return true;
}
bool ByteArray::readFromFile(const std::string& name) {
std::ifstream ifs;
ifs.open(name, std::ios::binary);
if(!ifs) {
SYLAR_LOG_ERROR(g_logger) << "readFromFile name=" << name
<< "error, errno=" << errno << " errstr=" << strerror(errno);
return false;
}
std::shared_ptr
<char> buff(new char[m_baseSize], [](char* ptr){ delete[] ptr; });
while(!ifs.eof()) {
ifs.read(buff.get(), m_baseSize);
write(buff.get(), ifs.gcount());
}
return true;
}
测试
对每种int类型进行测试,序列化后写入文件并读取打印,底层链表块大小设置为1byte,模拟极端情况
#include "sylar/bytearray.h"
#include "sylar/sylar.h"
static sylar::Logger::ptr g_logger = SYLAR_LOG_ROOT();
void test() {
#define XX(type, len, write_fun, read_fun, base_len) { \
std::vector
<type> vec; \
for(int i = 0; i < len; i++) { \
vec.push_back(rand()); \
} \
sylar::ByteArray::ptr ba(new sylar::ByteArray(base_len)); \
for(auto& i : vec) { \
ba->write_fun(i); \
} \
ba->setPosition(0); \
for(size_t i = 0; i < vec.size(); i++) { \
type v = ba->read_fun(); \
SYLAR_ASSERT(v == vec[i]); \
} \
SYLAR_ASSERT(ba->getReadSize() == 0); \
SYLAR_LOG_INFO(g_logger) << #write_fun "/" #read_fun \
" ( " #type " ) len=" << #len \
<< " base_len=" << #base_len \
<< " size=" << ba->getSize(); \
}
XX(int8_t, 100, writeFint8, readFint8, 1);
XX(uint8_t, 100, writeFuint8, readFuint8, 1);
XX(int16_t, 100, writeFint16, readFint16, 1);
XX(uint16_t, 100, writeFuint16, readFuint16, 1);
XX(int32_t, 100, writeFint32, readFint32, 1);
XX(uint32_t, 100, writeFuint32, readFuint32, 1);
XX(int64_t, 100, writeFint64, readFint64, 1);
XX(uint64_t, 100, writeFuint64, readFuint64, 1);
XX(int32_t, 100, writeInt32, readInt32, 1);
XX(uint32_t, 100, writeUint32, readUint32, 1);
XX(int64_t, 100, writeInt64, readInt64, 1);
XX(uint64_t, 100, writeUint64, readUint64, 1);
#undef XX
#define XX(type, len, write_fun, read_fun, base_len) { \
std::vector
<type> vec; \
for(int i = 0; i < len; i++) { \
vec.push_back(rand()); \
} \
sylar::ByteArray::ptr ba(new sylar::ByteArray(base_len)); \
for(auto& i : vec) { \
ba->write_fun(i); \
} \
ba->setPosition(0); \
for(size_t i = 0; i < vec.size(); i++) { \
type v = ba->read_fun(); \
SYLAR_ASSERT(v == vec[i]); \
} \
SYLAR_ASSERT(ba->getReadSize() == 0); \
SYLAR_LOG_INFO(g_logger) << #write_fun "/" #read_fun \
" ( " #type " ) len=" << #len \
<< " base_len=" << #base_len \
<< " size=" << ba->getSize(); \
ba->setPosition(0); \
SYLAR_ASSERT(ba->writeToFile("/tmp/" #type "_" #len "_" #write_fun ".dat")); \
sylar::ByteArray::ptr ba2(new sylar::ByteArray(base_len * 2)); \
SYLAR_ASSERT(ba2->readFromFile("/tmp/" #type "_" #len "_" #write_fun ".dat")); \
ba2->setPosition(0); \
SYLAR_ASSERT(ba->toString() == ba2->toString()); \
SYLAR_ASSERT(ba->getPosition() == 0); \
SYLAR_ASSERT(ba->getPosition() == 0); \
}
XX(int8_t, 100, writeFint8, readFint8, 1);
XX(uint8_t, 100, writeFuint8, readFuint8, 1);
XX(int16_t, 100, writeFint16, readFint16, 1);
XX(uint16_t, 100, writeFuint16, readFuint16, 1);
XX(int32_t, 100, writeFint32, readFint32, 1);
XX(uint32_t, 100, writeFuint32, readFuint32, 1);
XX(int64_t, 100, writeFint64, readFint64, 1);
XX(uint64_t, 100, writeFuint64, readFuint64, 1);
XX(int32_t, 100, writeInt32, readInt32, 1);
XX(uint32_t, 100, writeUint32, readUint32, 1);
XX(int64_t, 100, writeInt64, readInt64, 1);
XX(uint64_t, 100, writeUint64, readUint64, 1);
#undef XX
}
int main(int argc, char** argv) {
test();
return 0;
}
可以看到成功向文件中写入并读取了序列化数据


总结
提供了常用类型的序列化/反序列化方法




严肃阅读中……