1. Nacos集群状态下服务端心跳检查客户端状态

87 阅读2分钟
  1. 服务端检查客户端的心跳是从注册实例的时候开启的定时任务,注册代码如下
InstanceController源码:

public String register(HttpServletRequest request) throws Exception {
    
    final String namespaceId = WebUtils
            .optional(request, CommonParams.NAMESPACE_ID, Constants.DEFAULT_NAMESPACE_ID);
    final String serviceName = WebUtils.required(request, CommonParams.SERVICE_NAME);
    NamingUtils.checkServiceNameFormat(serviceName);
    
    final Instance instance = parseInstance(request);
    //重点:服务注册
    serviceManager.registerInstance(namespaceId, serviceName, instance);
    return "ok";
}
  1. ServiceManager类下:
public void registerInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
    //重点:创建一个空的服务实例,这里会加入心跳检测,跟进去!!!!
    createEmptyService(namespaceId, serviceName, instance.isEphemeral());

    Service service = getService(namespaceId, serviceName);

    if (service == null) {
        throw new NacosException(NacosException.INVALID_PARAM,
                "service not found, namespace: " + namespaceId + ", service: " + serviceName);
    }

    addInstance(namespaceId, serviceName, instance.isEphemeral(), instance);
}
  1. ServiceManager类下:
public void createEmptyService(String namespaceId, String serviceName, boolean local) throws NacosException {
    //创建服务
    createServiceIfAbsent(namespaceId, serviceName, local, null);
}

4.ServiceManager类下:

public void createServiceIfAbsent(String namespaceId, String serviceName, boolean local, Cluster cluster)
        throws NacosException {
    Service service = getService(namespaceId, serviceName);
    if (service == null) {

        Loggers.SRV_LOG.info("creating empty service {}:{}", namespaceId, serviceName);
        service = new Service();
        service.setName(serviceName);
        service.setNamespaceId(namespaceId);
        service.setGroupName(NamingUtils.getGroupName(serviceName));
        // now validate the service. if failed, exception will be thrown
        service.setLastModifiedMillis(System.currentTimeMillis());
        service.recalculateChecksum();
        if (cluster != null) {
            cluster.setService(service);
            service.getClusterMap().put(cluster.getName(), cluster);
        }
        service.validate();
        //重点:放入服务到内存以及初始化心跳
        putServiceAndInit(service);
        if (!local) {
            addOrReplaceService(service);
        }
    }
}

5.ServiceManager类下:

private void putServiceAndInit(Service service) throws NacosException {
    putService(service);
    //重点:初始化执行心跳开启  跟进去!!
    service.init();
    consistencyService
            .listen(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), true), service);
    consistencyService
            .listen(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), false), service);
    Loggers.SRV_LOG.info("[NEW-SERVICE] {}", service.toJson());
}

6.Service类下

public void init() {
    //心跳定时任务开启
    HealthCheckReactor.scheduleCheck(clientBeatCheckTask);
    for (Map.Entry<String, Cluster> entry : clusterMap.entrySet()) {
        entry.getValue().setService(this);
        entry.getValue().init();
    }
}

7.HealthCheckReactor类下:

//进入ClientBeatCheckTask中的run方法
public static void scheduleCheck(ClientBeatCheckTask中的run方法 task) {
    //开启定时任务
    futureMap.putIfAbsent(task.taskKey(), GlobalExecutor.scheduleNamingHealth(task, 5000, 5000, TimeUnit.MILLISECONDS));
}

8.ClientBeatCheckTask类下:

@Override
public void run() {
    try {
        //集群状态下判断由谁来更新当前节点的数据
        if (!getDistroMapper().responsible(service.getName())) {
            return;
        }
        //是否开启健康检查任务,默认是 true
        if (!getSwitchDomain().isHealthCheckEnabled()) {
            return;
        }
        
        List<Instance> instances = service.allIPs(true);
        
        // first set health status of instances:
        for (Instance instance : instances) {
            if (System.currentTimeMillis() - instance.getLastBeat() > instance.getInstanceHeartBeatTimeOut()) {
                if (!instance.isMarked()) {
                    if (instance.isHealthy()) {
                        instance.setHealthy(false);
                        Loggers.EVT_LOG
                                .info("{POS} {IP-DISABLED} valid: {}:{}@{}@{}, region: {}, msg: client timeout after {}, last beat: {}",
                                        instance.getIp(), instance.getPort(), instance.getClusterName(),
                                        service.getName(), UtilsAndCommons.LOCALHOST_SITE,
                                        instance.getInstanceHeartBeatTimeOut(), instance.getLastBeat());
                        getPushService().serviceChanged(service);
                        ApplicationUtils.publishEvent(new InstanceHeartbeatTimeoutEvent(this, instance));
                    }
                }
            }
        }
        
        if (!getGlobalConfig().isExpireInstance()) {
            return;
        }
        
        // then remove obsolete instances:
        for (Instance instance : instances) {
            
            if (instance.isMarked()) {
                continue;
            }
            
            if (System.currentTimeMillis() - instance.getLastBeat() > instance.getIpDeleteTimeout()) {
                // delete instance
                Loggers.SRV_LOG.info("[AUTO-DELETE-IP] service: {}, ip: {}", service.getName(),
                        JacksonUtils.toJson(instance));
                deleteIp(instance);
            }
        }
        
    } catch (Exception e) {
        Loggers.SRV_LOG.warn("Exception while processing client beat time out.", e);
    }
    
}

8.DistroMapper类下, 这里能够保证每个服务在多台服务器上只由一个节点检查当前服务的心跳。

public boolean responsible(String serviceName) {
    //集群nacos实例:例如5台
    final List<String> servers = healthyList;
    // 如果采用单机模式启动,直接返回true
    if (!switchDomain.isDistroEnabled() || EnvUtil.getStandaloneMode()) {
        return true;
    }
    
    if (CollectionUtils.isEmpty(servers)) {
        // means distro config is not ready yet
        return false;
    }
    //本机在list中从前往后的位置假设是第二台,位置:1
    int index = servers.indexOf(EnvUtil.getLocalAddress());
    //本机从后往前的位置:1
    int lastIndex = servers.lastIndexOf(EnvUtil.getLocalAddress());
    //这里找不到主机
    if (lastIndex < 0 || index < 0) {
        return true;
    }
    //拿到当前服务%列表大小的值=1,这个值在每个nacos上都是唯一确定的
    int target = distroHash(serviceName) % servers.size();
    //index <= target <= lastIndex
    return target >= index && target <= lastIndex;
}