Eureka server源码解析(三)

198 阅读6分钟

1、服务剔除

1.1 自动配置

在EurekaServerAutoConfiguration中会导入EurekaServerInitializerConfiguration进行eureka server的一些初始化操作

初始化eureka server的配置

初始化eureka server context(集群同步注册信息、启动一些定时器(服务剔除、自我保护机制监听)、初始化自我保护机制的阈值)

服务剔除集群间不会进行同步 只进行本地剔除

如果想要自我保护机制正常运行 建议客户端的心跳续约间隔与服务端的相同

EurekaServerAutoConfiguration自动配置类导入了EurekaServerInitializerConfiguration

public class EurekaServerInitializerConfiguration
		implements ServletContextAware, SmartLifecycle, Ordered {
		//实现了SmartLifecycle的生命周期方法 spring在启动过程中会进行回调
        public void start() {
           //创建一个新的线程来执行操作 并不会影响main线程
           new Thread(() -> {
              try {
                 // TODO: is this class even needed now?
                 //初始化eureka server上下文
                 eurekaServerBootstrap.contextInitialized(
                       EurekaServerInitializerConfiguration.this.servletContext);
                 log.info("Started Eureka Server");
								//发布事件
                 publish(new EurekaRegistryAvailableEvent(getEurekaServerConfig()));
                 EurekaServerInitializerConfiguration.this.running = true;
                 publish(new EurekaServerStartedEvent(getEurekaServerConfig()));
              }
              catch (Exception ex) {
                 // Help!
                 log.error("Could not initialize Eureka servlet context", ex);
              }
           }).start();
        }
        
       }
}

org.springframework.cloud.netflix.eureka.server.EurekaServerBootstrap#contextInitialized

public void contextInitialized(ServletContext context) {
   try {
      //读取配置文件 初始化eureka环境
      initEurekaEnvironment();
      //初始化eureka 上下文   去集群其他节点同步注册表  初始化服务剔除定时任务
      initEurekaServerContext();

      context.setAttribute(EurekaServerContext.class.getName(), this.serverContext);
   }
   catch (Throwable e) {
      log.error("Cannot bootstrap eureka server :", e);
      throw new RuntimeException("Cannot bootstrap eureka server :", e);
   }
}

org.springframework.cloud.netflix.eureka.server.EurekaServerBootstrap#initEurekaServerContext

protected void initEurekaServerContext() throws Exception {
   // For backward compatibility
   JsonXStream.getInstance().registerConverter(new V1AwareInstanceInfoConverter(),
         XStream.PRIORITY_VERY_HIGH);
   XmlXStream.getInstance().registerConverter(new V1AwareInstanceInfoConverter(),
         XStream.PRIORITY_VERY_HIGH);

   if (isAws(this.applicationInfoManager.getInfo())) {
      this.awsBinder = new AwsBinderDelegate(this.eurekaServerConfig,
            this.eurekaClientConfig, this.registry, this.applicationInfoManager);
      this.awsBinder.start();
   }

   EurekaServerContextHolder.initialize(this.serverContext);

   log.info("Initialized server context");

   // Copy registry from neighboring eureka node
   //从其他eureka server节点同步集群信息 返回同步到的客户端数量
   int registryCount = this.registry.syncUp();
    //初始化自我保护阈值 初始化服务剔除定时任务
   this.registry.openForTraffic(this.applicationInfoManager, registryCount);

   // Register all monitoring statistics.
   EurekaMonitors.registerAllStats();
}

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#syncUp

public int syncUp() {
    // Copy entire entry from neighboring DS node
    int count = 0;

    //serverConfig.getRegistrySyncRetries():读取配置的重试次数
    for (int i = 0; ((i < serverConfig.getRegistrySyncRetries()) && (count == 0)); i++) {
        if (i > 0) {
            try {
                //重试等待时间
                Thread.sleep(serverConfig.getRegistrySyncRetryWaitMs());
            } catch (InterruptedException e) {
                logger.warn("Interrupted during registry transfer..");
                break;
            }
        }
        //获取集群注册信息 此时的eureka sever 相对于其他eureka server节点来说 就是一个eureka client
        Applications apps = eurekaClient.getApplications();
        for (Application app : apps.getRegisteredApplications()) {
            for (InstanceInfo instance : app.getInstances()) {
                try {
                    if (isRegisterable(instance)) {
                        //拿到注册信息后 注册到本地
                        register(instance, instance.getLeaseInfo().getDurationInSecs(), true);
                        count++;
                    }
                } catch (Throwable t) {
                    logger.error("During DS init copy", t);
                }
            }
        }
    }
    return count;
}

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#openForTraffic()

省去了org.springframework.cloud.netflix.eureka.server.InstanceRegistry#openForTraffic()的调用 没干啥事

public void openForTraffic(ApplicationInfoManager applicationInfoManager, int count) {
    // Renewals happen every 30 seconds and for a minute it should be a factor of 2.
    //预计需要接受心跳续约的客户端数量 就是从集群同步到的客户端数量
    this.expectedNumberOfClientsSendingRenews = count;
    //重新计算触发自我保护机制阈值
    updateRenewsPerMinThreshold();
    logger.info("Got {} instances from neighboring DS node", count);
    logger.info("Renew threshold is: {}", numberOfRenewsPerMinThreshold);
    this.startupTime = System.currentTimeMillis();
    if (count > 0) {
        this.peerInstancesTransferEmptyOnStartup = false;
    }
    DataCenterInfo.Name selfName = applicationInfoManager.getInfo().getDataCenterInfo().getName();
    boolean isAws = Name.Amazon == selfName;
    if (isAws && serverConfig.shouldPrimeAwsReplicaConnections()) {
        logger.info("Priming AWS connections for all replicas..");
        primeAwsReplicas(applicationInfoManager);
    }
    logger.info("Changing status to UP");
    applicationInfoManager.setInstanceStatus(InstanceStatus.UP);
    //创建线程 执行服务剔除定时任务
    super.postInit();
}

com.netflix.eureka.registry.AbstractInstanceRegistry#postInit

protected void postInit() {
    renewsLastMin.start();
    if (evictionTaskRef.get() != null) {
        evictionTaskRef.get().cancel();
    }
    evictionTaskRef.set(new EvictionTask());
    //添加定时任务
    evictionTimer.schedule(evictionTaskRef.get(),
            serverConfig.getEvictionIntervalTimerInMs(),
            serverConfig.getEvictionIntervalTimerInMs());
}

1.2 服务剔除任务类

定时清除长时间没有发送心跳续约的客户端 默认60s执行一次

class EvictionTask extends TimerTask {

    private final AtomicLong lastExecutionNanosRef = new AtomicLong(0l);

    @Override
    public void run() {
        try {
            //获取补偿时间毫秒数 计算公式 = 当前时间 - 最后任务执行时间 - 任务执行频率
            long compensationTimeMs = getCompensationTimeMs();
            logger.info("Running the evict task with compensationTime {}ms", compensationTimeMs);
           //执行服务剔除方法
            evict(compensationTimeMs);
        } catch (Throwable e) {
            logger.error("Could not run the evict task", e);
        }
    }

}

com.netflix.eureka.registry.AbstractInstanceRegistry#evict(long)

public void evict(long additionalLeaseMs) {
    logger.debug("Running the evict task");
	//如果触发了自我保护机制 就不进行服务剔除
    //isLeaseExpirationEnabled():判断是否打开(打开自我保护机制后就有可能触发)和触发服务保护机制
    if (!isLeaseExpirationEnabled()) {
        logger.debug("DS: lease expiration is currently disabled.");
        return;
    }

    //遍历服务注册表 调用租债器的isExpired()方法 判断服务是否过期
    //需要剔除的节点信息保存在expiredLeases
    List<Lease<InstanceInfo>> expiredLeases = new ArrayList<>();
    for (Entry<String, Map<String, Lease<InstanceInfo>>> groupEntry : registry.entrySet()) {
        Map<String, Lease<InstanceInfo>> leaseMap = groupEntry.getValue();
        if (leaseMap != null) {
            for (Entry<String, Lease<InstanceInfo>> leaseEntry : leaseMap.entrySet()) {
                Lease<InstanceInfo> lease = leaseEntry.getValue();
                if (lease.isExpired(additionalLeaseMs) && lease.getHolder() != null) {
                    expiredLeases.add(lease);
                }
            }
        }
    }

    //eureka Server 在运行期间会去统计心跳失败比例在 15 分钟之内是否低于 85%,如果低于 85%,Eureka Server 会将这些实例保护起来,让这些实例不会过期  即有15%的节点在15分钟之内没有进行心跳续约
    //取自我保护机制的阈值和需要剔除的节点数量中小的那一个(先尽量不触发自我保护机制)
    
    //拿到eureka server所有的注册节点数量
    int registrySize = (int) getLocalRegistrySize();
    
    int registrySizeThreshold = (int) (registrySize * serverConfig.getRenewalPercentThreshold());
    //如果清理租约数量 > evictionLimit 就会触发自我保护机制
    int evictionLimit = registrySize - registrySizeThreshold;
    //计算 最大允许清理租约数量(不触发自我保护机制)
    int toEvict = Math.min(expiredLeases.size(), evictionLimit);
    if (toEvict > 0) {
        logger.info("Evicting {} items (expired={}, evictionLimit={})", toEvict, expiredLeases.size(), evictionLimit);

        Random random = new Random(System.currentTimeMillis());
        //开始剔除节点
        for (int i = 0; i < toEvict; i++) {
            //随机剔除
            int next = i + random.nextInt(expiredLeases.size() - i);
            Collections.swap(expiredLeases, i, next);
            Lease<InstanceInfo> lease = expiredLeases.get(i);

            String appName = lease.getHolder().getAppName();
            String id = lease.getHolder().getId();
            EXPIRED.increment();
            logger.warn("DS: Registry: expired lease for {}/{}", appName, id);
            //服务剔除
            internalCancel(appName, id, false);
        }
    }
}

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#isLeaseExpirationEnabled

当最近一分钟心跳次数( renewsLastMin ) 小于 numberOfRenewsPerMinThreshold 时,并且开启自动保护模式开关( eureka.enableSelfPreservation = true 默认为true) 时,触发自动保护机制,不再自动过期租约

public boolean isLeaseExpirationEnabled() {
    if (!isSelfPreservationModeEnabled()) {
        // The self preservation mode is disabled, hence allowing the instances to expire.
        return true;
    }
    //getNumOfRenewsInLastMin:最近一分钟的心跳次数
    //numberOfRenewsPerMinThreshold:触发自我保护机制阈值 一分钟内eureka server需要接收到多少个心跳续约当前eureka servr节点才算正常
    
    return numberOfRenewsPerMinThreshold > 0 && getNumOfRenewsInLastMin() > numberOfRenewsPerMinThreshold;
}

1.3 bug

com.netflix.eureka.lease.Lease#isExpired(long)

😈注意:在不考虑 additionalLeaseMs 参数的情况下,租约过期时间比预期多了一个 duration,原因在于 renew() 方法错误的设置 lastUpdateTimestamp = System.currentTimeMillis() + duration,正确的设置应该是 lastUpdateTimestamp = System.currentTimeMillis()

public boolean isExpired(long additionalLeaseMs) {
    return (evictionTimestamp > 0 || System.currentTimeMillis() > (lastUpdateTimestamp + duration + additionalLeaseMs));
}
public void renew() {
   lastUpdateTimestamp = System.currentTimeMillis() + duration;
    
  //  正确的设置应该是 `lastUpdateTimestamp = System.currentTimeMillis()
}

org.springframework.cloud.netflix.eureka.server.InstanceRegistry#internalCancel()

@Override
protected boolean internalCancel(String appName, String id, boolean isReplication) {
   handleCancelation(appName, id, isReplication);
   return super.internalCancel(appName, id, isReplication);
}

com.netflix.eureka.registry.AbstractInstanceRegistry#internalCancel()

protected boolean internalCancel(String appName, String id, boolean isReplication) {
    try {
        read.lock();
        //增加取消注册次数到监控
        CANCEL.increment(isReplication);
        Map<String, Lease<InstanceInfo>> gMap = registry.get(appName);
        Lease<InstanceInfo> leaseToCancel = null;
        if (gMap != null) {
        	//剔除租约映射
            leaseToCancel = gMap.remove(id);
        }
        synchronized (recentCanceledQueue) {
            //添加到最近取消注册的调试队列 用于 Eureka-Server 运维界面的显示,无实际业务逻辑使用
            /**
            * 最近取消注册的调试队列
            * key :添加时的时间戳
            * value :字符串 = 应用名(应用实例信息编号)
            **/
            recentCanceledQueue.add(new Pair<Long, String>(System.currentTimeMillis(), appName + "(" + id + ")"));
        }
        //移除应用实例覆盖状态映射
        InstanceStatus instanceStatus = overriddenInstanceStatusMap.remove(id);
        if (instanceStatus != null) {
            logger.debug("Removed instance id {} from the overridden map which has value {}", id, instanceStatus.name());
        }
        //租约不存在
        if (leaseToCancel == null) {
            //添加取消租约不存在到监控
            CANCEL_NOT_FOUND.increment(isReplication);
            logger.warn("DS: Registry: cancel failed because Lease is not registered for: {}/{}", appName, id);
            return false;
        } else {
            //更新剔除时间
            leaseToCancel.cancel();
            InstanceInfo instanceInfo = leaseToCancel.getHolder();
            String vip = null;
            String svip = null;
            if (instanceInfo != null) {
                //设置当前服务剔除的操作类型 供客户端做增量更新使用
                instanceInfo.setActionType(ActionType.DELETED);
                 //将当前服务剔除或服务下架操作添加到最近租约变更记录队列 供客户端做增量更新使用
                recentlyChangedQueue.add(new RecentlyChangedItem(leaseToCancel));
                instanceInfo.setLastUpdatedTimestamp();
                vip = instanceInfo.getVIPAddress();
                svip = instanceInfo.getSecureVipAddress();
            }
            //使指定key对应的读写缓存失效
            invalidateCache(appName, vip, svip);
            logger.info("Cancelled instance {}/{} (replication={})", appName, id, isReplication);
            return true;
        }
    } finally {
        read.unlock();
    }
}

2、服务下架

2.1 入口

com.netflix.eureka.resources.InstanceResource#cancelLease

    @DELETE
    public Response cancelLease(@HeaderParam("x-netflix-discovery-replication") String isReplication) {
        try {
            boolean isSuccess = this.registry.cancel(this.app.getName(), this.id, "true".equals(isReplication));
            if (isSuccess) {
                logger.debug("Found (Cancel): {} - {}", this.app.getName(), this.id);
                return Response.ok().build();
            } else {
                logger.info("Not Found (Cancel): {} - {}", this.app.getName(), this.id);
                return Response.status(Status.NOT_FOUND).build();
            }
        } catch (Throwable var3) {
            logger.error("Error (cancel): {} - {}", new Object[]{this.app.getName(), this.id, var3});
            return Response.serverError().build();
        }
    }

2.2 发布事件

org.springframework.cloud.netflix.eureka.server.InstanceRegistry#cancel

@Override
public boolean cancel(String appName, String serverId, boolean isReplication) {
   handleCancelation(appName, serverId, isReplication);
   return super.cancel(appName, serverId, isReplication);
}

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#cancel

public boolean cancel(final String appName, final String id,
                      final boolean isReplication) {
    if (super.cancel(appName, id, isReplication)) {
        replicateToPeers(Action.Cancel, appName, id, null, null, isReplication);
        synchronized (lock) {
            if (this.expectedNumberOfClientsSendingRenews > 0) {
                //更新预计需要接受心跳续约的客户端数量
                this.expectedNumberOfClientsSendingRenews = this.expectedNumberOfClientsSendingRenews - 1;
                //重新计算触发自我保护机制阈值
              updateRenewsPerMinThreshold();
            }
        }
        return true;
    }
    return false;
}

com.netflix.eureka.registry.AbstractInstanceRegistry#cancel():最终调用的和服务剔除是同一个方法

public boolean cancel(String appName, String id, boolean isReplication) {
    return internalCancel(appName, id, isReplication);
}