AOF
思想
AOF(append only file):以独立日志的方式记录每次写命令,重启时再重新执行AOF文件中的命令达到恢复数据的目的。AOF的主要作用是解决了
数据持久化的实时性,目前已经是Redis持久化的主流方式
。
特点
- 被写入到AOF文件的所有命令都是以redis命令请求协议格式保存的,redis请求协议是纯文本的,具有很高的可读性,且可以直接修改。因为是纯文本协议,因此我们可以使用记事本这类工具打开,在centos上,使用vim打开。
12345678910111213141516171819
|
^M----->对应的就是\r\n$6^M 4 SELECT^M 5 $1^M 6 0^M 7 *3^M 8 $3^M 9 set^M 10 $1^M 11 a^M 12 $1^M 13 a^M 14 *5^M 15 $4^M 16 mset^M 17 $1^M 18 a^M 19 $1^M 20 a^M
|
- 只保存写命令(pubsub除外)
- 支持aof重写
- 支持RDB+AOF混合存储
配置文件

123456789101112131415161718192021222324252627282930313233343536373839404142
|
appendonly noappendfilename "appendonly.aof"# The fsync() call tells the Operating System to actually write data on disk# instead of waiting for more data in the output buffer. Some OS will really flush# data on disk, some other OS will just try to do it ASAP.## Redis supports three different modes:## no: don
|
AOF写入流程
数据结构
redis中aof,
是先保存在aof缓冲区中的
,数据结构见redisServer:
持久化参数解析
redis中aof刷盘有三种策略,由参数appendfsync控制:
-
always:每次写入都要同步AOF文件,在一般的SATA硬盘上,Redis只能支持大约几百TPS写入,显然跟Redis高性能特性背道而驰,不建议配置。
-
no:由于操作系统每次同步AOF文件的周期不可控(其实差不多也就1s),而且会加大每次同步硬盘的数据量,虽然提升了性能,但数据安全性无法保证。
-
everysec,是建议的同步策略,也是默认配置,做到兼顾性能和数据安全性。理论上只有在系统突然宕机的情况下丢失1秒的数据。需要注意的是使用everysec选项时,
是利用异步线程来处理的(还记得我们之前讨论的三类子线程吗)
。
123456789101112131415161718192021222324252627
|
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */#ifdef __linux__#define redis_fsync fdatasync#else#define redis_fsync fsync#endifvoid flushAppendOnlyFile(int force) { ... //这个是条件 else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { // 每秒刷新缓存到磁盘一次。 if (!sync_in_progress) { // 添加任务到后台线程。 aof_background_fsync(server.aof_fd); server.aof_fsync_offset = server.aof_current_size; } server.aof_last_fsync = server.unixtime; } ...}// 添加异步任务void aof_background_fsync(int fd) { bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);}
|
关于更多IO,推荐看看这篇文章lrita.github.io/2019/03/13/…
具体流程

- feedAppendOnlyFile
开启aof持久化时,会将命令保存到aof_buf缓冲区中,
然后再定时任务中定期的将数据持久化到磁盘。
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
|
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { sds buf = sdsempty(); robj *tmpargv[3]; if (dictid != server.aof_selected_db) { char seldb[64]; snprintf(seldb,sizeof(seldb),"%d",dictid); buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", (unsigned long)strlen(seldb),seldb); server.aof_selected_db = dictid; } if (cmd->proc == expireCommand || cmd->proc == pexpireCommand || cmd->proc == expireatCommand) { buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) { tmpargv[0] = createStringObject("SET",3); tmpargv[1] = argv[1]; tmpargv[2] = argv[3]; buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setCommand && argc > 3) { int i; robj *exarg = NULL, *pxarg = NULL; buf = catAppendOnlyGenericCommand(buf,3,argv); for (i = 3; i < argc; i ++) { if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1]; if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1]; } serverAssert(!(exarg && pxarg)); if (exarg) buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1], exarg); if (pxarg) buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1], pxarg); } else { buf = catAppendOnlyGenericCommand(buf,argc,argv); } if (server.aof_state == AOF_ON) server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf)); if (server.aof_child_pid != -1) aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf)); sdsfree(buf);}
|
- flushAppendOnlyFile
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
|
#define AOF_WRITE_LOG_ERROR_RATE 30 void flushAppendOnlyFile(int force) { ssize_t nwritten; int sync_in_progress = 0; mstime_t latency; if (sdslen(server.aof_buf) == 0) return; //如果是AOF_FSYNC_EVERYSEC模式,因为aof写入操作是在单独线程完成的,所以要看是否有处于正在 //同步中 if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { if (sync_in_progress) { //我们可以推迟1,2s,但是也不能一直推迟 // 前面没有推迟过 write 操作,这里将推迟写操作的起始时间记录下来 // 然后就返回,不执行 write 或者 fsync if (server.aof_flush_postponed_start == 0) { server.aof_flush_postponed_start = server.unixtime; return; } else if (server.unixtime - server.aof_flush_postponed_start < 2) { return; } server.aof_delayed_fsync++; serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); } } //计算延时,开启monitor下,才会启用 latencyStartMonitor(latency); //具体刷盘操作 nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); latencyEndMonitor(latency); //不同情况输出不同的内容引起的超时 if (sync_in_progress) { latencyAddSampleIfNeeded("aof-write-pending-fsync",latency); } else if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) { latencyAddSampleIfNeeded("aof-write-active-child",latency); } else { latencyAddSampleIfNeeded("aof-write-alone",latency); } latencyAddSampleIfNeeded("aof-write",latency); //清零延迟 write 的时间记录 server.aof_flush_postponed_start = 0; //写入的文件出错 if (nwritten != (ssize_t)sdslen(server.aof_buf)) { static time_t last_write_error_log = 0; int can_log = 0; if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) { can_log = 1; last_write_error_log = server.unixtime; } if (nwritten == -1) { if (can_log) { serverLog(LL_WARNING,"Error writing to the AOF file: %s", strerror(errno)); server.aof_last_write_errno = errno; } } else { if (can_log) { serverLog(LL_WARNING,"Short write while writing to " "the AOF file: (nwritten=%lld, " "expected=%lld)", (long long)nwritten, (long long)sdslen(server.aof_buf)); } //如果写入的文件问题,我们将其移除,有点类似回滚机制 if (ftruncate(server.aof_fd, server.aof_current_size) == -1) { if (can_log) { serverLog(LL_WARNING, "Could not remove short write " "from the append-only file. Redis may refuse " "to load the AOF the next time it starts. " "ftruncate: %s", strerror(errno)); } } else { nwritten = -1; } server.aof_last_write_errno = ENOSPC; } //如果出问题了,是alway那么就直接退出,因为他无法回滚,已经写入到磁盘了 if (server.aof_fsync == AOF_FSYNC_ALWAYS) { serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting..."); exit(1); } else { server.aof_last_write_status = C_ERR; if (nwritten > 0) { server.aof_current_size += nwritten; sdsrange(server.aof_buf,nwritten,-1); } return; } } else { if (server.aof_last_write_status == C_ERR) { serverLog(LL_WARNING, "AOF write error looks solved, Redis can write again."); server.aof_last_write_status = C_OK; } } server.aof_current_size += nwritten; //重复使用aof_buf,小于4K的话只是清空,如果大于4K直接释放再进行分配 if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { sdsclear(server.aof_buf); } else { sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } // no-appendfsync-on-rewrite参数设置了,表示在rewrite截断不能进行fsync if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return; //如果是always,那么执行redis_fsync,linux下是fdatasync if (server.aof_fsync == AOF_FSYNC_ALWAYS) { latencyStartMonitor(latency); redis_fsync(server.aof_fd); latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { if (!sync_in_progress) aof_background_fsync(server.aof_fd); server.aof_last_fsync = server.unixtime; }}
|
1234567891011121314151617181920
|
if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { if (sync_in_progress) { if (server.aof_flush_postponed_start == 0) { server.aof_flush_postponed_start = server.unixtime; return; } else if (server.unixtime - server.aof_flush_postponed_start < 2) { return; } server.aof_delayed_fsync++; serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); } }
|
- 啥时候结束呢?如果上次延时操作距离现在超过了2s,那么需要强制刷新了。
12345
|
else if (server.unixtime - server.aof_flush_postponed_start < 2) { return; }
|
12345678910111213141516171819
|
if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return;//那么下面这些就不会进行 */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { /* redis_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ latencyStartMonitor(latency); redis_fsync(server.aof_fd); /* Let
|
123456789101112131415161718192021
|
#define run_with_period(_ms_) \ if ((_ms_ <= 1000 / server.hz) || \ !(cronloops % ((_ms_) / (1000 / server.hz))))int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { ...
|
AOF读取
Redis启动时加载aof文件
12345678910111213141516171819202122232425262728293031323334353637383940414243
|
int main(int argc, char **argv) { ... loadDataFromDisk(); ...}void loadDataFromDisk(void) { ... long long start = ustime(); if (server.aof_state == AOF_ON) { if (loadAppendOnlyFile(server.aof_filename) == C_OK) serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); } else { rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; } ...}int loadAppendOnlyFile(char *filename) { ...
|
debug loadaof加载aof
debug loadaof会先清空数据库,然后再加载aof文件。
1234567891011121314
|
else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) { if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1); emptyDb(-1,EMPTYDB_NO_FLAGS,NULL); protectClient(c); int ret = loadAppendOnlyFile(server.aof_filename); unprotectClient(c); if (ret != C_OK) { addReply(c,shared.err); return; } server.dirty = 0; serverLog(LL_WARNING,"Append Only File loaded by DEBUG LOADAOF"); addReply(c,shared.ok); }
|
AOF文件结构
AOF文件真没啥可以说的,直接就是resp格式的命令,在最开始也有截图。需要注意的是,因为redis支持RDB+AOF混合式,因此在读取aof文件时,会先读取前5个字节,判断是否是”REDIS”,如果是则为混合存储。否则仅仅AOF文件。
12345678910111213141516171819
|
char sig[5]; if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) { if (fseek(fp,0,SEEK_SET) == -1) goto readerr;} else { rio rdb; serverLog(LL_NOTICE,"Reading RDB preamble from AOF file..."); if (fseek(fp,0,SEEK_SET) == -1) goto readerr; rioInitWithFile(&rdb,fp); if (rdbLoadRio(&rdb,NULL,1) != C_OK) { serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted"); goto readerr; } else { serverLog(LL_NOTICE,"Reading the remaining AOF tail..."); }}
|
还记得上一节RDB持久化时,说道RDB表示EOF结束的宏吗?RDB_OPCODE_EOF
1234
|
else if (type == RDB_OPCODE_EOF) { break; }
|
AOF一些注意点
AOF对过期键
- 被惰性或者定期删除后,会追加一条del指令至aof文件,并向客户端返回空;因为AOF重写:会去掉过期键。
- AOF+RDB混合加载时,针对过期键不会特殊处理,会全部加载
|
if (server.masterhost == NULL && !loading_aof && expiretime != -1 && expiretime < now) { decrRefCount(key); decrRefCount(val); } else { dbAdd(db,key,val); if (expiretime != -1) setExpire(NULL,db,key,expiretime); objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock); decrRefCount(key); }
|
优先加载AOF
123456789101112
|
void loadDataFromDisk(void) { ... long long start = ustime(); if (server.aof_state == AOF_ON) { if (loadAppendOnlyFile(server.aof_filename) == C_OK) serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); } else { rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; } ...}
|
AOF优缺点
- 优点:
- 提供更灵活的策略,来平衡性能和可靠性。
- 追加模式,容错性强,写到一半宕机或者错误,可以快速恢复
- 优先使用AOF
- 缺点:
- 对于相同数量的数据集而言,AOF文件通常要大于RDB文件
- 恢复速度慢于rdb
