前面几篇文章已经实现了,任务调度,路由,心跳的维护。但是还有一个问题就是,任务确实已经调度成功,但是现在的任务是异步执行的(单开一个协程)。
所以定时任务执行结束后,需要将任务执行结果,回传到调度中心。
schedule -> trigger: 调度任务
trigger -> log: save job log
trigger -> client: 远程触发任务
client -> executor: 执行任务
executor -> trigger_callback: 添加回调到队列
trigger_callback -> admin: 异步取出队列信息,触发任务结果回调
逻辑很简单直接上代码
admin
// trigger.go
// 触发任务之前,创建 jobLog 并保存数据库,并将 logId 发送到 example
func AddTrigger(jobId int,
triggerType string,
failRetryCount int,
executorShardingParam string,
executorParam string,
addressList string) {
go func() {
defer func() {
if err := recover(); err != nil {
log.Printf("trigger err %v\n", err)
}
}()
result := biz.Return[string]{
Code: 500,
}
job := dao.LoadJobInfoById(jobId)
group := dao.LoadGroup(job.JobGroup)
// Log
jobLog := model.XxlJobLog{
JobGroup: group.Id,
JobId: job.Id,
TriggerTime: sql.NullTime{
Time: time.Now(),
Valid: true,
},
}
dao.SaveLog(&jobLog)
// 获取路由
route := router.GetRouter(job.ExecutorBlockStrategy)
address, err := route.Router(jobId, group.GetRegistryList())
if err != nil {
result.Msg = "路由地址失败"
updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
return
}
p := biz.TriggerParam{
JobId: jobId,
ExecutorHandler: job.ExecutorHandler,
ExecutorParams: job.ExecutorParam,
ExecutorBlockStrategy: job.ExecutorBlockStrategy,
ExecutorTimeout: job.ExecutorTimeout,
LogId: jobLog.Id,
LogDateTime: jobLog.TriggerTime.Time.UnixMilli(),
GlueType: job.GlueType,
GlueSource: job.GlueSource,
GlueUpdatetime: job.GlueUpdatetime.UnixMilli(),
BroadcastIndex: 1, // TODO 分片
BroadcastTotal: 1, // TODO 分片
}
body, err := json.Marshal(p)
if err != nil {
msg := fmt.Sprintf("param json marshal err, jobId=%d err=%s\n", jobId, err.Error())
result.Msg = msg
updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
return
}
resp, err := httpclient.GetHttpclient().Url(address + "/run").Body(body).Post()
if err != nil {
msg := fmt.Sprintf("http post err, jobId=%d err=%s\n", jobId, err.Error())
result.Msg = msg
updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
return
}
err = json.Unmarshal(resp.Content, &result)
if err != nil {
msg := fmt.Sprintf("resp json unmarshal err, jobId=%d err=%s\n", jobId, err.Error())
result.Msg = msg
updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
return
}
// update db
jobLog.ExecutorAddress = address
jobLog.ExecutorHandler = job.ExecutorHandler
jobLog.ExecutorParam = job.ExecutorParam
jobLog.ExecutorShardingParam = "" // TODO 分片
jobLog.ExecutorFailRetryCount = 0 // TODO 重试
updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
}()
}
// main.go
// 接收 example 任务执行完毕后,回调请求。
switch r.URL.Path {
case "/api/callback":
var param []*biz.HandleCallbackParam
err = json.Unmarshal(body, ¶m)
re := abiz.GetBizAdmin().Callback(param)
// callback.go
// 接收到 example 任务执行结束的回调信息后,更新 log 数据库。
type CallbackHelper struct {
Stop chan struct{}
}
func (c *CallbackHelper) Callback(params []*biz.HandleCallbackParam) {
go func() {
defer func() {
if err := recover(); err != nil {
log.Printf("callback err %v\n", err)
}
}()
for _, v := range params {
c.callback0(v)
}
}()
}
func (c *CallbackHelper) callback0(param *biz.HandleCallbackParam) {
jobLog := dao.LoadLog(param.LogId)
if jobLog == nil {
log.Printf("log item not found. %v\n", param.LogId)
return
}
// 日志已经回调过一次
if jobLog.HandleCode > 0 {
log.Printf("log repeate callback. %v\n", param.LogId)
return
}
var msg string
if jobLog.HandleMsg != "" {
msg = msg + jobLog.HandleMsg + "<br>"
}
if param.HandleMsg != "" {
msg = msg + param.HandleMsg
}
jobLog.HandleTime = sql.NullTime{
Time: time.Now(),
Valid: true,
}
jobLog.HandleCode = param.HandleCode
jobLog.HandleMsg = msg
dao.UpdateHandleInfoAndFinish(jobLog)
}
client
// trigger_callback.go
// 任务执行完成之后,将执行结果等信息,放入 TriggerCallback 结构体内的队列。
// 然后由异步协程
package callback
import (
"encoding/gob"
"fmt"
"log"
"os"
"sync"
"time"
"xin-job/client/cbiz"
"xin-job/client/sconf"
"xin-job/core/biz"
)
var instance = &TriggerCallback{
q: make([]*biz.HandleCallbackParam, 0),
size: 0,
}
func GetInstance() *TriggerCallback {
return instance
}
type TriggerCallback struct {
q []*biz.HandleCallbackParam // 利用切片实现简单队列
size int
sync.Mutex
stop chan struct{}
}
func (t *TriggerCallback) Start() {
go t.callback()
go t.retryCallback()
}
func (t *TriggerCallback) callback() {
defer func() {
if err := recover(); err != nil {
log.Printf("trigger callback err %v\n", err)
}
}()
for {
select {
case <-t.stop:
// TODO stop
default:
if t.size > 0 {
data := t.drainTo()
t.doCallback(data)
}
}
}
}
func (t *TriggerCallback) retryCallback() {
defer func() {
if err := recover(); err != nil {
log.Printf("trigger retry callback err %v\n", err)
}
}()
for {
select {
case <-t.stop:
default:
t.retryFailCallbackFile()
time.Sleep(30 * time.Second)
}
}
}
func (t *TriggerCallback) PushCallBack(param *biz.HandleCallbackParam) {
t.Mutex.Lock()
defer t.Mutex.Unlock()
t.q = append(t.q, param)
t.size++
}
// drainTo 队列中取出数据
func (t *TriggerCallback) drainTo() []*biz.HandleCallbackParam {
if t.size == 0 {
return nil
}
t.Mutex.Lock()
defer t.Mutex.Unlock()
n := make([]*biz.HandleCallbackParam, 0)
n = append(n, t.q...)
t.q = make([]*biz.HandleCallbackParam, 0)
return n
}
// doCallback 回调
func (t *TriggerCallback) doCallback(params []*biz.HandleCallbackParam) {
callbackRet := false
if len(params) == 0 {
return
}
for _, address := range sconf.ServerConf.AdminAddressList {
client := cbiz.GetBizClient()
client.AdminAddressUrl = address
result := client.Callback(params)
if result.Code == 200 {
// 回调成功
callbackRet = true
break // 调度中心可能为集群,回调到其中一台成功即可
} else {
// 记录失败日志
// TODO 本地日志
}
}
if !callbackRet {
// 回调失败,将 params 写入本地文件
t.appendFailCallbackFile(params)
}
}
// 把回调失败的数据存储到本地一个专门的文件当中
func (t *TriggerCallback) appendFailCallbackFile(params []*biz.HandleCallbackParam) {
// TODO 暂时写入当前目录,后续需要改为配置文件
pwd, _ := os.Getwd() // 当前目录
logPath := fmt.Sprintf("%s/callbacklog", pwd)
_, err := os.Stat(logPath)
if !os.IsExist(err) {
os.MkdirAll(logPath, 0775)
} else {
log.Printf("appendFailCallbackFile dir err=%s", err.Error())
}
logFile := fmt.Sprintf("%s/xxl-job-callback-%d.log", logPath, time.Now().UnixMilli())
file, err := os.Create(logFile)
if err != nil {
log.Printf("appendFailCallbackFile err=%s", err.Error())
}
defer file.Close()
// gob 序列化
e := gob.NewEncoder(file)
err = e.Encode(params)
if err != nil {
//log
}
}
func (t *TriggerCallback) retryFailCallbackFile() {
pwd, _ := os.Getwd()
logPath := fmt.Sprintf("%s/callbacklog", pwd)
files, err := os.ReadDir(logPath)
if err != nil {
//log
}
for _, v := range files {
if !v.IsDir() {
file, err := os.Open(v.Name())
if err != nil {
continue
}
defer file.Close()
d := gob.NewDecoder(file)
var params []*biz.HandleCallbackParam
err = d.Decode(¶ms)
if err != nil {
//log
}
os.Remove(v.Name())
t.doCallback(params)
}
}
}