Eureka Server源码解析(二)

250 阅读8分钟

1、服务注册

1.1 入口

  • 入口在eureke-core包下的 com.netflix.eureka.resources.ApplicationResource#addInstance
@POST
@Consumes({"application/json", "application/xml"})
public Response addInstance(InstanceInfo info, @HeaderParam("x-netflix-discovery-replication") String isReplication) {
 				//省略部分代码
        this.registry.register(info, "true".equals(isReplication));
        return Response.status(204).build();
    }
}

Resource相当于springmvc的Controller 这里我们只关注业务层

InstanceRegistry extends PeerAwareInstanceRegistryImpl extends AbstractInstanceRegistry

org.springframework.cloud.netflix.eureka.server.InstanceRegistry.register():这个类是springcloud写的

1.2 发布事件

@Override
public void register(InstanceInfo info, int leaseDuration, boolean isReplication) {
    //info:服务注册信息
   //发布监听
   handleRegistration(info, leaseDuration, isReplication);
   //调用父类的监听方法(此处才是netflix的代码)
   super.register(info, leaseDuration, isReplication);
}

1.3 集群同步(服务注册完后)

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#register

public void register(final InstanceInfo info, final boolean isReplication) {
    //默认心跳续约过期时间
    int leaseDuration = Lease.DEFAULT_DURATION_IN_SECS;
    if (info.getLeaseInfo() != null && info.getLeaseInfo().getDurationInSecs() > 0) {
        //读取客户端传递归来的心跳续约时间
        leaseDuration = info.getLeaseInfo().getDurationInSecs();
    }
    //服务注册
    super.register(info, leaseDuration, isReplication);
    //服务注册成功之后进行集群信息同步
    replicateToPeers(Action.Register, info.getAppName(), info.getId(), info, null, isReplication);
}

1.4 服务注册

com.netflix.eureka.registry.AbstractInstanceRegistry#register():真正的服务注册实现

public void register(InstanceInfo c, int leaseDuration, boolean isReplication) {
    //registrant:服务注册信息 eureka client注册时传递的信息
    try {
        read.lock();
        //private final ConcurrentHashMap<String, Map<String, Lease<InstanceInfo>>> registry;
        //外层map的key是服务名  内层map的key是实例id  一个服务可以有多个实例
        //Lease:租债器
        //拿到微服务组
        Map<String, Lease<InstanceInfo>> gMap = registry.get(registrant.getAppName());
        REGISTER.increment(isReplication);
        //判断微服务组是否存在  
        //客户端对于注册请求有超时和重试机制 如果请求超时 将会进行重试 
        if (gMap == null) {
            //如果为空就创建一个新的
            final ConcurrentHashMap<String, Lease<InstanceInfo>> gNewMap = new ConcurrentHashMap<String, Lease<InstanceInfo>>();
            gMap = registry.putIfAbsent(registrant.getAppName(), gNewMap);
            if (gMap == null) {
                gMap = gNewMap;
            }
        }
        //拿到微服务组中的某个已经存在的实例信息
        Lease<InstanceInfo> existingLease = gMap.get(registrant.getId());
        //existingLease.getHolder():租债器里面的实例对象 
        // Retain the last dirty timestamp without overwriting it, if there is already a lease
        //不为空说明冲突了(同一个实例发送了多个注册请求或者客户端重连)
        if (existingLease != null && (existingLease.getHolder() != null)) {
            //获取此实例最后一次操作时间
            Long existingLastDirtyTimestamp = existingLease.getHolder().getLastDirtyTimestamp();
            //客户端传递过来的时间戳
            Long registrationLastDirtyTimestamp = registrant.getLastDirtyTimestamp();
            logger.debug("Existing lease found (existing={}, provided={}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);

            // this is a > instead of a >= because if the timestamps are equal, we still take the remote transmitted
            // InstanceInfo instead of the server local copy.
            //如果冲突了 哪个时间戳比较新就用哪个   看谁后发的注册请求  同一个实例 由于有超时和重试机制 后发的注册请求会覆盖先发的
            //如果两个时间戳相等 使用客户端新传递过来的实例信息
            if (existingLastDirtyTimestamp > registrationLastDirtyTimestamp) {
                logger.warn("There is an existing lease and the existing lease's dirty timestamp {} is greater" +
                        " than the one that is being registered {}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
                logger.warn("Using the existing instanceInfo instead of the new instanceInfo as the registrant");
                registrant = existingLease.getHolder();
            }
        } else {
            //没有冲突则重新计算触发自我保护阈值
            // The lease does not exist and hence it is a new registration
            synchronized (lock) {
                if (this.expectedNumberOfClientsSendingRenews > 0) {
                    // Since the client wants to register it, increase the number of clients sending renews													 //预计需要接受心跳续约的客户端数量 客户端注册时加1 服务下架时减1
                    this.expectedNumberOfClientsSendingRenews = this.expectedNumberOfClientsSendingRenews + 1;
                    //重新计算触发自我保护阈值
                    updateRenewsPerMinThreshold();
                }
            }
            logger.debug("No previous lease information found; it is new registration");
        }
        //创建一个租债器
        Lease<InstanceInfo> lease = new Lease<InstanceInfo>(registrant, leaseDuration);
        if (existingLease != null) {
            lease.setServiceUpTimestamp(existingLease.getServiceUpTimestamp());
        }
        //将包含实例信息的租债器保存到微服务组中
        gMap.put(registrant.getId(), lease);
        synchronized (recentRegisteredQueue) {
            //将当前实例的注册事件放到最新注册的queue里面,方便查询最近的注册事件
            recentRegisteredQueue.add(new Pair<Long, String>(
                    System.currentTimeMillis(),
                    registrant.getAppName() + "(" + registrant.getId() + ")"));
        }
      
        InstanceStatus overriddenStatusFromMap = overriddenInstanceStatusMap.get(registrant.getId());
        if (overriddenStatusFromMap != null) {
            logger.info("Storing overridden status {} from map", overriddenStatusFromMap);
            registrant.setOverriddenStatus(overriddenStatusFromMap);
        }

        InstanceStatus overriddenInstanceStatus = getOverriddenInstanceStatus(registrant, existingLease, isReplication);
        registrant.setStatusWithoutDirty(overriddenInstanceStatus);

       
        if (InstanceStatus.UP.equals(registrant.getStatus())) {
            lease.serviceUp();
        }
        //设置应用实例的操作类型 为添加 供客户端做增量更新使用
        registrant.setActionType(ActionType.ADDED);
        //将当前注册操作添加到最近租约变更记录队列 供客户端做增量更新使用
        recentlyChangedQueue.add(new RecentlyChangedItem(lease));
        //更新租约的过期时间
        registrant.setLastUpdatedTimestamp();
        //使读写缓存无效
        invalidateCache(registrant.getAppName(), registrant.getVIPAddress(), registrant.getSecureVipAddress());
        logger.info("Registered instance {}/{} with status {} (replication={})",
                registrant.getAppName(), registrant.getId(), registrant.getStatus(), isReplication);
    } finally {
        read.unlock();
    }
}

1.5 Lease:租债器

装饰eureka client实例

public class Lease<T> {

    enum Action {
        Register, Cancel, Renew
    };

    //默认心跳续约时间
    public static final int DEFAULT_DURATION_IN_SECS = 90;

    private T holder;
    //服务剔除时间戳
    private long evictionTimestamp;
    //服务注册时间戳
    private long registrationTimestamp;
    //服务上线时间戳
    private long serviceUpTimestamp;
    // Make it volatile so that the expiration task would see this quicker
    //续约后的服务过期时间
    private volatile long lastUpdateTimestamp;
    private long duration;
    
    //心跳续约
      public void renew() {
          //当前时间+续约时间
        lastUpdateTimestamp = System.currentTimeMillis() + duration;

    }

    //服务剔除
    public void cancel() {
        if (evictionTimestamp <= 0) {
            evictionTimestamp = System.currentTimeMillis();
        }
    }

    //标记服务上线
    public void serviceUp() {
        if (serviceUpTimestamp == 0) {
            serviceUpTimestamp = System.currentTimeMillis();
        }
    }
    
    //additionalLeaseMs:
    public boolean isExpired(long additionalLeaseMs) {
        //剔除时间大于0 或 当前时间 > 续约后的服务过期时间 + 续约时间 + additionalLeaseMs
        //这个地方 eureka承认有错误 应该是是剔除时间大于0 或 当前时间 > 续约后的服务过期时间 + additionalLeaseMs
        //lastUpdateTimestamp本身就代表续约后的服务过期时间
        return (evictionTimestamp > 0 || System.currentTimeMillis() > (lastUpdateTimestamp + duration + additionalLeaseMs));
    }

2、心跳续约

2.1 入口

  • 入口:com.netflix.eureka.resources.InstanceResource#renewLease()
@PUT
public Response renewLease(
        @HeaderParam(PeerEurekaNode.HEADER_REPLICATION) String isReplication,
        @QueryParam("overriddenstatus") String overriddenStatus,
        @QueryParam("status") String status,
        @QueryParam("lastDirtyTimestamp") String lastDirtyTimestamp) {
    //true表示来自集群其他节点同步
    boolean isFromReplicaNode = "true".equals(isReplication);
    boolean isSuccess = registry.renew(app.getName(), id, isFromReplicaNode);

  	//续租失败,返回404,EurekaClient端收到404后会发起注册请求
    if (!isSuccess) {
        logger.warn("Not Found (Renew): {} - {}", app.getName(), id);
        // Eureka-Client 收到 404 响应后,会重新发起 注册
        return Response.status(Status.NOT_FOUND).build();
    }
 
    Response response;
    if (lastDirtyTimestamp != null && serverConfig.shouldSyncWhenTimestampDiffers()) {
        response = this.validateDirtyTimestamp(Long.valueOf(lastDirtyTimestamp), isFromReplicaNode);
        // Store the overridden status since the validation found out the node that replicates wins
        if (response.getStatus() == Response.Status.NOT_FOUND.getStatusCode()
                && (overriddenStatus != null)
                && !(InstanceStatus.UNKNOWN.name().equals(overriddenStatus))
                && isFromReplicaNode) {
            registry.storeOverriddenStatusIfRequired(app.getAppName(), id, InstanceStatus.valueOf(overriddenStatus));
        }
    } else {
        response = Response.ok().build();
    }
    logger.debug("Found (Renew): {} - {}; reply status={}", app.getName(), id, response.getStatus());
    return response;
}

com.netflix.eureka.resources.InstanceResource#validateDirtyTimestamp

private Response validateDirtyTimestamp(Long lastDirtyTimestamp,
                                        boolean isReplication) {
    //根据服务名和实例id拿到实例信息
    InstanceInfo appInfo = registry.getInstanceByAppAndId(app.getName(), id, false);
    if (appInfo != null) {
        //lastDirtyTimestamp:客户端传过来的该实例最后一次发送心跳续约的时间戳
        //appInfo.getLastDirtyTimestamp():eureka server保存到的该实例最后一次发送心跳续约的时间戳
        if ((lastDirtyTimestamp != null) && 
            (!lastDirtyTimestamp.equals(appInfo.getLastDirtyTimestamp()))) {
            Object[] args = {id, appInfo.getLastDirtyTimestamp(), lastDirtyTimestamp, isReplication};
			//客户端传递过来的lastDirtyTimestamp比服务端保存的实例的lastDirtyTimestamp大 (正常情况下应该是相等的)
            if (lastDirtyTimestamp > appInfo.getLastDirtyTimestamp()) {
                return Response.status(Status.NOT_FOUND).build();
            } else if (appInfo.getLastDirtyTimestamp() > lastDirtyTimestamp) {
                if (isReplication) {
                    return Response.status(Status.CONFLICT).entity(appInfo).build();
                } else {
                    return Response.ok().build();
                }
            }
        }

    }
    return Response.ok().build();
}

2.2 发布事件

具体的业务代码:org.springframework.cloud.netflix.eureka.server.InstanceRegistry#renew

public boolean renew(final String appName, final String serverId,
      boolean isReplication) {
   log("renew " + appName + " serverId " + serverId + ", isReplication {}"
         + isReplication);
   List<Application> applications = getSortedApplications();
   //根据客户端传递过来的服务名称和实例id 双层for循环拿到对应的实例信息
   for (Application input : applications) {
      if (input.getName().equals(appName)) {
         InstanceInfo instance = null;
         for (InstanceInfo info : input.getInstances()) {
            if (info.getId().equals(serverId)) {
               instance = info;
               break;
            }
         }
         //发布监听
         publishEvent(new EurekaInstanceRenewedEvent(this, appName, serverId,
               instance, isReplication));
         break;
      }
   }
   //父类的心跳续约方法
   return super.renew(appName, serverId, isReplication);
}

2.3集群同步(心跳续约后)

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#renew

public boolean renew(final String appName, final String id, final boolean isReplication) {
    //调用父类的心跳续约方法
    if (super.renew(appName, id, isReplication)) {
        //集群同步
        replicateToPeers(Action.Heartbeat, appName, id, null, null, isReplication);
        return true;
    }
    return false;
}

2.4 心跳续约

com.netflix.eureka.registry.AbstractInstanceRegistry#renew

public boolean renew(String appName, String id, boolean isReplication) {
    RENEW.increment(isReplication);
    //根据服务名称拿到微服务组
    Map<String, Lease<InstanceInfo>> gMap = registry.get(appName);
    Lease<InstanceInfo> leaseToRenew = null;
    //根据微服务id拿到实例信息
    if (gMap != null) {
        leaseToRenew = gMap.get(id);
    }
    if (leaseToRenew == null) {
        RENEW_NOT_FOUND.increment(isReplication);
        logger.warn("DS: Registry: lease doesn't exist, registering resource: {} - {}", appName, id);
        return false;
    } else {
        InstanceInfo instanceInfo = leaseToRenew.getHolder();
        if (instanceInfo != null) {
            // touchASGCache(instanceInfo.getASGName());
            InstanceStatus overriddenInstanceStatus = this.getOverriddenInstanceStatus(
                    instanceInfo, leaseToRenew, isReplication);
            if (overriddenInstanceStatus == InstanceStatus.UNKNOWN) {
                logger.info("Instance status UNKNOWN possibly due to deleted override for instance {}"
                        + "; re-register required", instanceInfo.getId());
                RENEW_NOT_FOUND.increment(isReplication);
                return false;
            }
            //应用实例的状态与覆盖状态不相等,使用覆盖状态覆盖应用实例的状态
            if (!instanceInfo.getStatus().equals(overriddenInstanceStatus)) {
                logger.info(
                        "The instance status {} is different from overridden instance status {} for instance {}. "
                                + "Hence setting the status to overridden status", instanceInfo.getStatus().name(),
                                instanceInfo.getOverriddenStatus().name(),
                                instanceInfo.getId());
                instanceInfo.setStatusWithoutDirty(overriddenInstanceStatus);

            }
        }
        //增加每分钟续约次数 自我保护机制会用到
        renewsLastMin.increment();
        //设置续约后的过期时间
        leaseToRenew.renew();
        return true;
    }
}

3、集群同步

集群同步原理:

  • Eureka-Server 集群不区分主从节点或者 Primary & Secondary 节点,所有节点相同角色( 也就是没有角色 ),完全对等
  • Eureka-Client 可以向任意 Eureka-Client 发起任意读写操作
  • eureka client选择任意一个Eureka-Server发起服务注册 心跳续约 服务下架 服务状态更新 状态删除操作,eureka server 成功执行这些操作后 会循环eureka server集群内每个节点,将操作复制到另外的 Eureka-Server 以达到最终一致性。注意,Eureka-Server 保证AP。

eureka集群有3个节点 server1 server2 server3

client1选择server2发起注册请求 server2处理完client1的注册请求 (注意 此时并未返回success)会进行集群同步操作 遍历eureka server列表 向server1和server3发出同样的client1注册请求 同时 isReplication为true 表示本次注册请求来自于集群同步 这样server1和server3接收到来自server2的注册请求后 不会再次进行集群同步 从而造成死循环

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#replicateToPeers负责所有的集群同步

服务注册 心跳续约 服务下架 服务状态更新 状态删除会进行集群同步

private void replicateToPeers(Action action, String appName, String id,
                              InstanceInfo info /* optional */,
                              InstanceStatus newStatus /* optional */, boolean isReplication) {
    //isReplication:是否来自于集群同步 判断这个请求来自于eureka client还是eureka server(同步)
    //action:操作类型
    Stopwatch tracer = action.getTimer().start();
    try {
        if (isReplication) {
            numberOfReplicationsLastMin.increment();
        }
        // If it is a replication already, do not replicate again as this will create a poison replication
        if (peerEurekaNodes == Collections.EMPTY_LIST || isReplication) {
            //当集群只有单个节点或当前操作(服务注册 心跳续约 服务下架 服务状态更新 状态删除)来自于集群同步 返回
            return;
        }
        //遍历所有的eureka server节点
        for (final PeerEurekaNode node : peerEurekaNodes.getPeerEurekaNodes()) {
            // If the url represents this host, do not replicate to yourself.
            //判断当前节点是否为自己
            if (peerEurekaNodes.isThisMyUrl(node.getServiceUrl())) {
                continue;
            }
            //向其他的eureka server发出相同的操作请求
            replicateInstanceActionsToPeers(action, appName, id, info, newStatus, node);
        }
    } finally {
        tracer.stop();
    }
}

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#replicateInstanceActionsToPeers()

private void replicateInstanceActionsToPeers(Action action, String appName,
                                             String id, InstanceInfo info, InstanceStatus newStatus,
                                             PeerEurekaNode node) {
    try {
        InstanceInfo infoFromRegistry = null;
        CurrentRequestVersion.set(Version.V2);
        //根据不同的操作 执行对应的动作
        switch (action) {
            case Cancel:
                node.cancel(appName, id);
                break;
            case Heartbeat:
                InstanceStatus overriddenStatus = overriddenInstanceStatusMap.get(id);
                infoFromRegistry = getInstanceByAppAndId(appName, id, false);
                node.heartbeat(appName, id, infoFromRegistry, overriddenStatus, false);
                break;
            case Register:
                node.register(info);
                break;
            case StatusUpdate:
                infoFromRegistry = getInstanceByAppAndId(appName, id, false);
                node.statusUpdate(appName, id, newStatus, infoFromRegistry);
                break;
            case DeleteStatusOverride:
                infoFromRegistry = getInstanceByAppAndId(appName, id, false);
                node.deleteStatusOverride(appName, id, infoFromRegistry);
                break;
        }
    } catch (Throwable t) {
        logger.error("Cannot replicate information to {} for action {}", node.getServiceUrl(), action.name(), t);
    }
}