先放架构图
如果执行器服务,example 宕机了,admin 是如何感知的呢,这个方案就是心跳检测。
心跳维护流程
这个流程大概就是
client 发送注册信息到 admin,admin 将注册信息维护到 xxl_job_registry 表,然后 admin 定时将 xxl_job_registry 内有效的地址维护到 group 表。
上代码
client
package registry
import (
"log"
"time"
"xin-job/client/cbiz"
"xin-job/client/sconf"
"xin-job/core/biz"
)
var instance = &ExecutorRegistry{
Client: cbiz.GetBizClient(),
Stop: make(chan struct{}),
}
func GetExecutorRegistry() *ExecutorRegistry {
return instance
}
// ExecutorRegistry 执行器注册器
type ExecutorRegistry struct {
Client *cbiz.BizClient
Stop chan struct{} // 标记是否停机
}
func (r *ExecutorRegistry) Start(appName, address string) {
if appName == "" {
log.Panicln(">>>>>>>>>>> xxl-job, executor registry config fail, appname is null.")
return
}
go func() {
for {
registryParam := biz.RegistryParam{
RegistryGroup: "EXECUTOR",
RegistryKey: appName,
RegistryValue: address,
}
select {
case <-r.Stop:
// 停机,取消注册
if len(sconf.ServerConf.AdminAddressList) > 0 {
for _, adminAddressUrl := range sconf.ServerConf.AdminAddressList {
r.Client.AdminAddressUrl = adminAddressUrl
// 向调度中心发起取消注册
registryResult := r.Client.RegistryRemove(registryParam)
if registryResult.Code == 200 {
// log
break
} else {
// log
}
}
}
default:
if len(sconf.ServerConf.AdminAddressList) > 0 {
for _, adminAddressUrl := range sconf.ServerConf.AdminAddressList {
r.Client.AdminAddressUrl = adminAddressUrl
// 向调度中心发起注册
registryResult := r.Client.Registry(registryParam)
if registryResult.Code == 200 {
// 向其中一个调度中心注册完成
// log
} else {
// log
}
}
// 睡眠 30 s
// 维持心跳
time.Sleep(30 * time.Second)
}
}
}()
}
代码很简单,就是一个协程里面,每 30s 向 admin 注册一次。
admin
func (r *RegistryHelper) Registry(registryParam biz.RegistryParam) biz.Return[string] {
go func() {
ret := dao.RegistryUpdate(registryParam.RegistryGroup, registryParam.RegistryKey, registryParam.RegistryValue)
if ret < 1 {
// 新增
dao.RegistrySave(registryParam.RegistryGroup, registryParam.RegistryKey, registryParam.RegistryValue)
}
}()
return biz.Return[string]{Code: 200}
}
func (r *RegistryHelper) RegistryRemove(registryParam biz.RegistryParam) biz.Return[string] {
go func() {
dao.RegistryDel(registryParam.RegistryGroup, registryParam.RegistryKey, registryParam.RegistryValue)
}()
return biz.Return[string]{Code: 200}
}
admin 接收到 client 请求后,将信息维护到 xxl_job_registry 表中。
接收 client 方法的定义在 main.go 中
package registry
import (
"slices"
"time"
"xin-job/admin/dao"
"xin-job/core/biz"
)
var instance *RegistryHelper
func init() {
instance = &RegistryHelper{}
}
func GetRegistryHelper() *RegistryHelper {
return instance
}
type RegistryHelper struct {
Stop chan struct{}
}
// Start 维护心跳
func (r *RegistryHelper) Start() {
go func() {
for {
select {
case <-r.Stop:
// TODO 停机
default:
// 查询所有自动注册的执行器
groups := dao.FindByAddressType(0)
if len(groups) > 0 {
// 查询过期的执行器
ids := dao.FindDead(90, time.Now())
if len(ids) > 0 {
// 删除过期心跳
dao.RemoveDead(ids)
}
// 查询未过期的执行器
registryList := dao.FindAll(90, time.Now())
appAddressMap := make(map[string][]string)
if len(registryList) > 0 {
for _, item := range registryList {
if item.RegistryGroup == "EXECUTOR" { // 自动注册
appName := item.RegistryKey
registryList, ok := appAddressMap[appName]
if !ok {
registryList = make([]string, 0)
}
if !slices.Contains(registryList, item.RegistryValue) {
registryList = append(registryList, item.RegistryValue)
}
appAddressMap[appName] = registryList
}
}
}
for _, item := range groups {
registryList := appAddressMap[item.AppName]
if len(registryList) > 0 {
var addressListStr string
for i, address := range registryList {
addressListStr = addressListStr + address
if i < len(registryList)-1 {
addressListStr = addressListStr + ","
}
}
item.AddressList = addressListStr
item.UpdateTime = time.Now()
dao.UpdateGroup(&item)
}
}
}
// 睡眠30s
time.Sleep(30 * time.Second)
}
}
}()
}
代码很简单,就是维护 xxl_job_registry 表与 xxl_job_group 表。
xxl_job_registry,删除过期的记录,90s 都没注册的执行器 xxl_job_group,将有效的执行器地址,维护到 addressList 字段。
路由对接
package trigger
import (
"encoding/json"
"fmt"
"log"
"xin-job/admin/dao"
"xin-job/admin/router"
"xin-job/core/biz"
"xin-job/core/httpclient"
)
// trigger 触发任务
// jobId 任务ID
// triggerType 触发的类型
// failRetryCount 失败重试次数
// executorShardingParam 分片参数
// executorShardingParam 执行器方法参数
// addressList 执行器的地址列表
func AddTrigger(jobId int,
triggerType string,
failRetryCount int,
executorShardingParam string,
executorParam string,
addressList string) {
go func() {
defer func() {
if err := recover(); err != nil {
log.Printf("trigger err %v\n", err)
}
}()
job := dao.LoadJobInfoById(jobId)
group := dao.LoadGroup(job.JobGroup)
// 获取路由
route := router.GetRouter(job.ExecutorBlockStrategy)
address, err := route.Router(jobId, group.GetRegistryList())
if err != nil {
log.Printf("调度失败: jobId=%d err:%s\n", jobId, err.Error())
return
}
p := biz.TriggerParam{
JobId: jobId,
ExecutorHandler: job.ExecutorHandler,
}
body, err := json.Marshal(p)
if err != nil {
log.Printf("trigger err %v\n", err)
return
}
resp, err := httpclient.GetHttpclient().Url(address + "/run").Body(body).Post()
if err != nil {
log.Printf("http post err %v\n", err)
return
}
// TODO log update db
fmt.Printf("resp code: %d Content:%v \n", resp.StatusCode, resp.Content)
}()
}
有效的路由地址从 xxl_job_group 表中查到,并通过路由策略,查找合适的执行器,进行触发定时任务。