手写 xxl-job 05

117 阅读3分钟

1.jpg

前面几篇文章已经实现了,任务调度,路由,心跳的维护。但是还有一个问题就是,任务确实已经调度成功,但是现在的任务是异步执行的(单开一个协程)。

所以定时任务执行结束后,需要将任务执行结果,回传到调度中心。

schedule -> trigger: 调度任务
trigger -> log: save job log
trigger -> client: 远程触发任务
client -> executor: 执行任务
executor -> trigger_callback: 添加回调到队列
trigger_callback -> admin: 异步取出队列信息,触发任务结果回调

逻辑很简单直接上代码

admin

// trigger.go
// 触发任务之前,创建 jobLog 并保存数据库,并将 logId 发送到 example
func AddTrigger(jobId int,
	triggerType string,
	failRetryCount int,
	executorShardingParam string,
	executorParam string,
	addressList string) {

	go func() {
		defer func() {
			if err := recover(); err != nil {
				log.Printf("trigger err %v\n", err)
			}
		}()
	
		result := biz.Return[string]{
			Code: 500,
		}
	
		job := dao.LoadJobInfoById(jobId)
		group := dao.LoadGroup(job.JobGroup)
	
		// Log
		jobLog := model.XxlJobLog{
			JobGroup: group.Id,
			JobId: job.Id,
			TriggerTime: sql.NullTime{
				Time: time.Now(),
				Valid: true,
			},
		}
	
		dao.SaveLog(&jobLog)
	
	
		// 获取路由
		route := router.GetRouter(job.ExecutorBlockStrategy)
		address, err := route.Router(jobId, group.GetRegistryList())
		
		if err != nil {
			result.Msg = "路由地址失败"
			updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
			return
		}
	
		p := biz.TriggerParam{
			JobId: jobId,
			ExecutorHandler: job.ExecutorHandler,
			ExecutorParams: job.ExecutorParam,
			ExecutorBlockStrategy: job.ExecutorBlockStrategy,
			ExecutorTimeout: job.ExecutorTimeout,
			LogId: jobLog.Id,
			LogDateTime: jobLog.TriggerTime.Time.UnixMilli(),
			GlueType: job.GlueType,
			GlueSource: job.GlueSource,
			GlueUpdatetime: job.GlueUpdatetime.UnixMilli(),
			BroadcastIndex: 1, // TODO 分片
			BroadcastTotal: 1, // TODO 分片
		}
	
		body, err := json.Marshal(p)
	
		if err != nil {
			msg := fmt.Sprintf("param json marshal err, jobId=%d err=%s\n", jobId, err.Error())
			
			result.Msg = msg
			updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
			return
		}
	
		resp, err := httpclient.GetHttpclient().Url(address + "/run").Body(body).Post()
	
		if err != nil {
			msg := fmt.Sprintf("http post err, jobId=%d err=%s\n", jobId, err.Error())
	
			result.Msg = msg
			updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
			return
		}
	
		err = json.Unmarshal(resp.Content, &result)
	
		if err != nil {
			msg := fmt.Sprintf("resp json unmarshal err, jobId=%d err=%s\n", jobId, err.Error())
			
			result.Msg = msg
			updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
			return
		}
	
	  
	
		// update db
		jobLog.ExecutorAddress = address
		jobLog.ExecutorHandler = job.ExecutorHandler
		jobLog.ExecutorParam = job.ExecutorParam
		jobLog.ExecutorShardingParam = "" // TODO 分片
		jobLog.ExecutorFailRetryCount = 0 // TODO 重试
	
		updateLog(&jobLog, &result, triggerType, route.GetTitle(), job, group)
	}()
}
// main.go
// 接收 example 任务执行完毕后,回调请求。
switch r.URL.Path {
	case "/api/callback":
		var param []*biz.HandleCallbackParam
		err = json.Unmarshal(body, &param)
	
		re := abiz.GetBizAdmin().Callback(param)
// callback.go
// 接收到 example 任务执行结束的回调信息后,更新 log 数据库。
type CallbackHelper struct {
	Stop chan struct{}
}

func (c *CallbackHelper) Callback(params []*biz.HandleCallbackParam) {
	go func() {
		defer func() {
			if err := recover(); err != nil {
				log.Printf("callback err %v\n", err)
			}
		}()

		for _, v := range params {
			c.callback0(v)
		}

	}()
}

func (c *CallbackHelper) callback0(param *biz.HandleCallbackParam) {
	jobLog := dao.LoadLog(param.LogId)

	if jobLog == nil {
		log.Printf("log item not found. %v\n", param.LogId)
		return
	}

	// 日志已经回调过一次
	if jobLog.HandleCode > 0 {
		log.Printf("log repeate callback. %v\n", param.LogId)
		return
	}

	var msg string
	if jobLog.HandleMsg != "" {
		msg = msg + jobLog.HandleMsg + "<br>"
	}

	if param.HandleMsg != "" {
		msg = msg + param.HandleMsg
	}

	jobLog.HandleTime = sql.NullTime{
		Time: time.Now(),
		Valid: true,
	}

	jobLog.HandleCode = param.HandleCode
	jobLog.HandleMsg = msg

	dao.UpdateHandleInfoAndFinish(jobLog)
}

client

// trigger_callback.go

// 任务执行完成之后,将执行结果等信息,放入 TriggerCallback 结构体内的队列。
// 然后由异步协程
package callback

import (
	"encoding/gob"
	"fmt"
	"log"
	"os"
	"sync"
	"time"
	"xin-job/client/cbiz"
	"xin-job/client/sconf"
	"xin-job/core/biz"
)

  

var instance = &TriggerCallback{
	q: make([]*biz.HandleCallbackParam, 0),
	size: 0,
}

  

func GetInstance() *TriggerCallback {
	return instance
}

  

type TriggerCallback struct {
	q []*biz.HandleCallbackParam // 利用切片实现简单队列
	size int
	sync.Mutex
	stop chan struct{}
}

  

func (t *TriggerCallback) Start() {
	go t.callback()
	go t.retryCallback()
}

  

func (t *TriggerCallback) callback() {
	defer func() {
		if err := recover(); err != nil {
			log.Printf("trigger callback err %v\n", err)
		}
	}()

	for {
		select {
			case <-t.stop:
			// TODO stop
			default:
				if t.size > 0 {
					data := t.drainTo()
					t.doCallback(data)
				}
			}
		}
	}

  

func (t *TriggerCallback) retryCallback() {
	defer func() {
		if err := recover(); err != nil {
			log.Printf("trigger retry callback err %v\n", err)
		}
	}()

  

	for {
		select {
			case <-t.stop:
			default:
				t.retryFailCallbackFile()
				time.Sleep(30 * time.Second)
		}
	}
}

  

func (t *TriggerCallback) PushCallBack(param *biz.HandleCallbackParam) {
	t.Mutex.Lock()
	defer t.Mutex.Unlock()
	t.q = append(t.q, param)
	t.size++
}

  

// drainTo 队列中取出数据
func (t *TriggerCallback) drainTo() []*biz.HandleCallbackParam {
	if t.size == 0 {
		return nil
	}
	
	t.Mutex.Lock()
	defer t.Mutex.Unlock()

	n := make([]*biz.HandleCallbackParam, 0)
	n = append(n, t.q...)
	t.q = make([]*biz.HandleCallbackParam, 0)

	return n
}

  

// doCallback 回调
func (t *TriggerCallback) doCallback(params []*biz.HandleCallbackParam) {
	callbackRet := false
	if len(params) == 0 {
		return
	}

	for _, address := range sconf.ServerConf.AdminAddressList {
		client := cbiz.GetBizClient()
		client.AdminAddressUrl = address
		result := client.Callback(params)

		if result.Code == 200 {
			// 回调成功
			callbackRet = true
			break // 调度中心可能为集群,回调到其中一台成功即可
		} else {
			// 记录失败日志
			// TODO 本地日志
		}
	}

	if !callbackRet {
		// 回调失败,将 params 写入本地文件
		t.appendFailCallbackFile(params)
	}
}

  

// 把回调失败的数据存储到本地一个专门的文件当中
func (t *TriggerCallback) appendFailCallbackFile(params []*biz.HandleCallbackParam) {
	// TODO 暂时写入当前目录,后续需要改为配置文件
	pwd, _ := os.Getwd() // 当前目录
	logPath := fmt.Sprintf("%s/callbacklog", pwd)
	_, err := os.Stat(logPath)
	if !os.IsExist(err) {
		os.MkdirAll(logPath, 0775)
	} else {
		log.Printf("appendFailCallbackFile dir err=%s", err.Error())
	}

	logFile := fmt.Sprintf("%s/xxl-job-callback-%d.log", logPath, time.Now().UnixMilli())

	file, err := os.Create(logFile)

	if err != nil {
		log.Printf("appendFailCallbackFile err=%s", err.Error())
	}
	defer file.Close()

  

	// gob 序列化
	e := gob.NewEncoder(file)
	err = e.Encode(params)

	if err != nil {
		//log
	}
}

func (t *TriggerCallback) retryFailCallbackFile() {
	pwd, _ := os.Getwd()
	logPath := fmt.Sprintf("%s/callbacklog", pwd)
	files, err := os.ReadDir(logPath)
	if err != nil {
		//log
	}
	
	for _, v := range files {
		if !v.IsDir() {
			file, err := os.Open(v.Name())
			if err != nil {
				continue
			}

			defer file.Close()
		
			d := gob.NewDecoder(file)
			var params []*biz.HandleCallbackParam
			err = d.Decode(&params)
			if err != nil {
				//log
			}

			os.Remove(v.Name())
			t.doCallback(params)
		}
	}
}