1.nacos、eureka上线下线
最近在进行eureka切换为nacos作为服务发现中间件。在采用金丝雀灰度发布的过程中发现服务已经把原的Eureka中注册的服务进行下线操作了,但是仍然有流量请求进来,这样就违背了官方宣称的秒级上下线特点。其中我们服务的负载均衡使用的ribbon这个负载均衡组件。以nacos为例子:
public String update(HttpServletRequest request) throws Exception {
String serviceName = WebUtils.required(request, CommonParams.SERVICE_NAME);
String namespaceId = WebUtils.optional(request, CommonParams.NAMESPACE_ID, Constants.DEFAULT_NAMESPACE_ID);
String agent = request.getHeader("Client-Version");
if (StringUtils.isBlank(agent)) {
agent = request.getHeader("User-Agent");
}
ClientInfo clientInfo = new ClientInfo(agent);
if (clientInfo.type == ClientInfo.ClientType.JAVA &&
clientInfo.version.compareTo(VersionUtil.parseVersion("1.0.0")) >= 0) {
serviceManager.updateInstance(namespaceId, serviceName, parseInstance(request));
} else {
serviceManager.registerInstance(namespaceId, serviceName, parseInstance(request));
}
return "ok";
}上边这段代码说明的是nacos进行上线下线的逻辑,可以看到核心逻辑是parseInstance()方法,里面的的实例信息就是从request中获取的。再看一下updateInstance方法:
public void updateInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
Service service = getService(namespaceId, serviceName);
if (service == null) {
throw new NacosException(NacosException.INVALID_PARAM, "service not found, namespace: " + namespaceId + ", service: " + serviceName);
}
if (!service.allIPs().contains(instance)) {
throw new NacosException(NacosException.INVALID_PARAM, "instance not exist: " + instance);
}
addInstance(namespaceId, serviceName, instance.isEphemeral(), instance);
}
public void addInstance(String namespaceId, String serviceName, boolean ephemeral, Instance... ips) throws NacosException {
String key = KeyBuilder.buildInstanceListKey(namespaceId, serviceName, ephemeral);
Service service = getService(namespaceId, serviceName);
List<Instance> instanceList = addIpAddresses(service, ephemeral, ips);
Instances instances = new Instances();
instances.setInstanceList(instanceList);
consistencyService.put(key, instances);
}可以看到实例下线,是立马更新server中的实例信息数据的。
2.Ribbon负载均衡更新机制
NacosServerList继承了AbstractServerList最终是在DynamicServerListLoadBalancer这类中进行负载所有的server的
public class NacosServerList extends AbstractServerList<NacosServer> {
private NacosDiscoveryProperties discoveryProperties;
private String serviceId;
public NacosServerList(NacosDiscoveryProperties discoveryProperties) {
this.discoveryProperties = discoveryProperties;
}
@Override
public List<NacosServer> getInitialListOfServers() {
return getServers();
}
@Override
public List<NacosServer> getUpdatedListOfServers() {
return getServers();
}
private List<NacosServer> getServers() {
try {
List<Instance> instances = discoveryProperties.namingServiceInstance()
.selectInstances(serviceId, true);
return instancesToServerList(instances);
}
catch (Exception e) {
throw new IllegalStateException(
"Can not get service instances from nacos, serviceId=" + serviceId,
e);
}
}
private List<NacosServer> instancesToServerList(List<Instance> instances) {
List<NacosServer> result = new ArrayList<>();
if (null == instances) {
return result;
}
for (Instance instance : instances) {
result.add(new NacosServer(instance));
}
return result;
}
public String getServiceId() {
return serviceId;
}
@Override
public void initWithNiwsConfig(IClientConfig iClientConfig) {
this.serviceId = iClientConfig.getClientName();
}
}
protected final ServerListUpdater.UpdateAction updateAction = new ServerListUpdater.UpdateAction() {
@Override
public void doUpdate() {
updateListOfServers();
}
};
public DynamicServerListLoadBalancer(IClientConfig clientConfig) {
initWithNiwsConfig(clientConfig);
}
@Override
public void initWithNiwsConfig(IClientConfig clientConfig) {
try {
super.initWithNiwsConfig(clientConfig);
String niwsServerListClassName = clientConfig.getPropertyAsString( CommonClientConfigKey.NIWSServerListClassName, DefaultClientConfigImpl.DEFAULT_SEVER_LIST_CLASS);
ServerList<T> niwsServerListImpl = (ServerList<T>) ClientFactory
.instantiateInstanceWithClientConfig(niwsServerListClassName, clientConfig);
//得到所有的server实现
this.serverListImpl = niwsServerListImpl;
if (niwsServerListImpl instanceof AbstractServerList) {
AbstractServerListFilter<T> niwsFilter = ((AbstractServerList) niwsServerListImpl)
.getFilterImpl(clientConfig);
niwsFilter.setLoadBalancerStats(getLoadBalancerStats());
this.filter = niwsFilter;
}
String serverListUpdaterClassName = clientConfig.getPropertyAsString( CommonClientConfigKey.ServerListUpdaterClassName, DefaultClientConfigImpl.DEFAULT_SERVER_LIST_UPDATER_CLASS);
// 获取Updater对象
this.serverListUpdater = (ServerListUpdater) ClientFactory.instantiateInstanceWithClientConfig(serverListUpdaterClassName, clientConfig);
restOfInit(clientConfig);
} catch (Exception e) {
throw new RuntimeException(
"Exception while initializing NIWSDiscoveryLoadBalancer:"
+ clientConfig.getClientName()
+ ", niwsClientConfig:" + clientConfig, e);
}
}
void restOfInit(IClientConfig clientConfig) {
boolean primeConnection = this.isEnablePrimingConnections();
// turn this off to avoid duplicated asynchronous priming done in BaseLoadBalancer.setServerList()
this.setEnablePrimingConnections(false);
//采用定时任务进行定时刷新实例信息缓存
enableAndInitLearnNewServersFeature();//最重要的点
//进行一次实例拉取操作
updateListOfServers();
if (primeConnection && this.getPrimeConnections() != null) {
this.getPrimeConnections() .primeConnections(getReachableServers());
}
this.setEnablePrimingConnections(primeConnection);
LOGGER.info("DynamicServerListLoadBalancer for client {} initialized: {}", clientConfig.getClientName(), this.toString());
}
// 这里就是进行实例信息缓存更新的操作
@VisibleForTesting
public void updateListOfServers() {
List<T> servers = new ArrayList<T>();
if (serverListImpl != null) {
// 调用拉取新实例信息的方法
servers = serverListImpl.getUpdatedListOfServers();
LOGGER.debug("List of Servers for {} obtained from Discovery client: {}", getIdentifier(), servers);
// 用Filter对拉取的servers列表进行更新
if (filter != null) {
servers = filter.getFilteredListOfServers(servers);
LOGGER.debug("Filtered List of Servers for {} obtained from Discovery client: {}", getIdentifier(), servers);
}
}
// 更新实例列表
updateAllServerList(servers);
}之后我们看最重要的enableAndInitLearnNewServersFeature这个方法的操作
@Override
public synchronized void start(final UpdateAction updateAction) {
if (isActive.compareAndSet(false, true)) {
final Runnable wrapperRunnable = new Runnable() {
@Override
public void run() {
if (!isActive.get()) {
if (scheduledFuture != null) {
scheduledFuture.cancel(true);
}
return;
}
try {
// 这里就是在DynamicServerListLoadBalancer中的Servers实现
updateAction.doUpdate();
lastUpdated = System.currentTimeMillis();
} catch (Exception e) {
logger.warn("Failed one update cycle", e);
}
}
};
// 默认定时任务执行时间间隔为30s
scheduledFuture = getRefreshExecutor().scheduleWithFixedDelay(
wrapperRunnable,
initialDelayMs,
refreshIntervalMs,
TimeUnit.MILLISECONDS);
} else {
logger.info("Already active, no-op");
}
}
最终虽然实现了秒级的实例上下线,但是由于在Spring Cloud中,负载组件rabbion的实例信息更新是采用了定时任务的形式,有可能这个任务上一秒刚刚执行完,下一秒你就执行实例上下线操作,那么ribbion要感知这个变化,就必须要等待refreshIntervalMs秒后才可以感知到。