1、服务注册
1.1 入口
- 入口在eureke-core包下的 com.netflix.eureka.resources.ApplicationResource#addInstance
@POST
@Consumes({"application/json", "application/xml"})
public Response addInstance(InstanceInfo info, @HeaderParam("x-netflix-discovery-replication") String isReplication) {
//省略部分代码
this.registry.register(info, "true".equals(isReplication));
return Response.status(204).build();
}
}
Resource相当于springmvc的Controller 这里我们只关注业务层
InstanceRegistry extends PeerAwareInstanceRegistryImpl extends AbstractInstanceRegistry
org.springframework.cloud.netflix.eureka.server.InstanceRegistry.register():这个类是springcloud写的
1.2 发布事件
@Override
public void register(InstanceInfo info, int leaseDuration, boolean isReplication) {
//info:服务注册信息
//发布监听
handleRegistration(info, leaseDuration, isReplication);
//调用父类的监听方法(此处才是netflix的代码)
super.register(info, leaseDuration, isReplication);
}
1.3 集群同步(服务注册完后)
com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#register
public void register(final InstanceInfo info, final boolean isReplication) {
//默认心跳续约过期时间
int leaseDuration = Lease.DEFAULT_DURATION_IN_SECS;
if (info.getLeaseInfo() != null && info.getLeaseInfo().getDurationInSecs() > 0) {
//读取客户端传递归来的心跳续约时间
leaseDuration = info.getLeaseInfo().getDurationInSecs();
}
//服务注册
super.register(info, leaseDuration, isReplication);
//服务注册成功之后进行集群信息同步
replicateToPeers(Action.Register, info.getAppName(), info.getId(), info, null, isReplication);
}
1.4 服务注册
com.netflix.eureka.registry.AbstractInstanceRegistry#register():真正的服务注册实现
public void register(InstanceInfo c, int leaseDuration, boolean isReplication) {
//registrant:服务注册信息 eureka client注册时传递的信息
try {
read.lock();
//private final ConcurrentHashMap<String, Map<String, Lease<InstanceInfo>>> registry;
//外层map的key是服务名 内层map的key是实例id 一个服务可以有多个实例
//Lease:租债器
//拿到微服务组
Map<String, Lease<InstanceInfo>> gMap = registry.get(registrant.getAppName());
REGISTER.increment(isReplication);
//判断微服务组是否存在
//客户端对于注册请求有超时和重试机制 如果请求超时 将会进行重试
if (gMap == null) {
//如果为空就创建一个新的
final ConcurrentHashMap<String, Lease<InstanceInfo>> gNewMap = new ConcurrentHashMap<String, Lease<InstanceInfo>>();
gMap = registry.putIfAbsent(registrant.getAppName(), gNewMap);
if (gMap == null) {
gMap = gNewMap;
}
}
//拿到微服务组中的某个已经存在的实例信息
Lease<InstanceInfo> existingLease = gMap.get(registrant.getId());
//existingLease.getHolder():租债器里面的实例对象
// Retain the last dirty timestamp without overwriting it, if there is already a lease
//不为空说明冲突了(同一个实例发送了多个注册请求或者客户端重连)
if (existingLease != null && (existingLease.getHolder() != null)) {
//获取此实例最后一次操作时间
Long existingLastDirtyTimestamp = existingLease.getHolder().getLastDirtyTimestamp();
//客户端传递过来的时间戳
Long registrationLastDirtyTimestamp = registrant.getLastDirtyTimestamp();
logger.debug("Existing lease found (existing={}, provided={}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
// this is a > instead of a >= because if the timestamps are equal, we still take the remote transmitted
// InstanceInfo instead of the server local copy.
//如果冲突了 哪个时间戳比较新就用哪个 看谁后发的注册请求 同一个实例 由于有超时和重试机制 后发的注册请求会覆盖先发的
//如果两个时间戳相等 使用客户端新传递过来的实例信息
if (existingLastDirtyTimestamp > registrationLastDirtyTimestamp) {
logger.warn("There is an existing lease and the existing lease's dirty timestamp {} is greater" +
" than the one that is being registered {}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
logger.warn("Using the existing instanceInfo instead of the new instanceInfo as the registrant");
registrant = existingLease.getHolder();
}
} else {
//没有冲突则重新计算触发自我保护阈值
// The lease does not exist and hence it is a new registration
synchronized (lock) {
if (this.expectedNumberOfClientsSendingRenews > 0) {
// Since the client wants to register it, increase the number of clients sending renews //预计需要接受心跳续约的客户端数量 客户端注册时加1 服务下架时减1
this.expectedNumberOfClientsSendingRenews = this.expectedNumberOfClientsSendingRenews + 1;
//重新计算触发自我保护阈值
updateRenewsPerMinThreshold();
}
}
logger.debug("No previous lease information found; it is new registration");
}
//创建一个租债器
Lease<InstanceInfo> lease = new Lease<InstanceInfo>(registrant, leaseDuration);
if (existingLease != null) {
lease.setServiceUpTimestamp(existingLease.getServiceUpTimestamp());
}
//将包含实例信息的租债器保存到微服务组中
gMap.put(registrant.getId(), lease);
synchronized (recentRegisteredQueue) {
//将当前实例的注册事件放到最新注册的queue里面,方便查询最近的注册事件
recentRegisteredQueue.add(new Pair<Long, String>(
System.currentTimeMillis(),
registrant.getAppName() + "(" + registrant.getId() + ")"));
}
InstanceStatus overriddenStatusFromMap = overriddenInstanceStatusMap.get(registrant.getId());
if (overriddenStatusFromMap != null) {
logger.info("Storing overridden status {} from map", overriddenStatusFromMap);
registrant.setOverriddenStatus(overriddenStatusFromMap);
}
InstanceStatus overriddenInstanceStatus = getOverriddenInstanceStatus(registrant, existingLease, isReplication);
registrant.setStatusWithoutDirty(overriddenInstanceStatus);
if (InstanceStatus.UP.equals(registrant.getStatus())) {
lease.serviceUp();
}
//设置应用实例的操作类型 为添加 供客户端做增量更新使用
registrant.setActionType(ActionType.ADDED);
//将当前注册操作添加到最近租约变更记录队列 供客户端做增量更新使用
recentlyChangedQueue.add(new RecentlyChangedItem(lease));
//更新租约的过期时间
registrant.setLastUpdatedTimestamp();
//使读写缓存无效
invalidateCache(registrant.getAppName(), registrant.getVIPAddress(), registrant.getSecureVipAddress());
logger.info("Registered instance {}/{} with status {} (replication={})",
registrant.getAppName(), registrant.getId(), registrant.getStatus(), isReplication);
} finally {
read.unlock();
}
}
1.5 Lease:租债器
装饰eureka client实例
public class Lease<T> {
enum Action {
Register, Cancel, Renew
};
//默认心跳续约时间
public static final int DEFAULT_DURATION_IN_SECS = 90;
private T holder;
//服务剔除时间戳
private long evictionTimestamp;
//服务注册时间戳
private long registrationTimestamp;
//服务上线时间戳
private long serviceUpTimestamp;
// Make it volatile so that the expiration task would see this quicker
//续约后的服务过期时间
private volatile long lastUpdateTimestamp;
private long duration;
//心跳续约
public void renew() {
//当前时间+续约时间
lastUpdateTimestamp = System.currentTimeMillis() + duration;
}
//服务剔除
public void cancel() {
if (evictionTimestamp <= 0) {
evictionTimestamp = System.currentTimeMillis();
}
}
//标记服务上线
public void serviceUp() {
if (serviceUpTimestamp == 0) {
serviceUpTimestamp = System.currentTimeMillis();
}
}
//additionalLeaseMs:
public boolean isExpired(long additionalLeaseMs) {
//剔除时间大于0 或 当前时间 > 续约后的服务过期时间 + 续约时间 + additionalLeaseMs
//这个地方 eureka承认有错误 应该是是剔除时间大于0 或 当前时间 > 续约后的服务过期时间 + additionalLeaseMs
//lastUpdateTimestamp本身就代表续约后的服务过期时间
return (evictionTimestamp > 0 || System.currentTimeMillis() > (lastUpdateTimestamp + duration + additionalLeaseMs));
}
2、心跳续约
2.1 入口
- 入口:com.netflix.eureka.resources.InstanceResource#renewLease()
@PUT
public Response renewLease(
@HeaderParam(PeerEurekaNode.HEADER_REPLICATION) String isReplication,
@QueryParam("overriddenstatus") String overriddenStatus,
@QueryParam("status") String status,
@QueryParam("lastDirtyTimestamp") String lastDirtyTimestamp) {
//true表示来自集群其他节点同步
boolean isFromReplicaNode = "true".equals(isReplication);
boolean isSuccess = registry.renew(app.getName(), id, isFromReplicaNode);
//续租失败,返回404,EurekaClient端收到404后会发起注册请求
if (!isSuccess) {
logger.warn("Not Found (Renew): {} - {}", app.getName(), id);
// Eureka-Client 收到 404 响应后,会重新发起 注册
return Response.status(Status.NOT_FOUND).build();
}
Response response;
if (lastDirtyTimestamp != null && serverConfig.shouldSyncWhenTimestampDiffers()) {
response = this.validateDirtyTimestamp(Long.valueOf(lastDirtyTimestamp), isFromReplicaNode);
// Store the overridden status since the validation found out the node that replicates wins
if (response.getStatus() == Response.Status.NOT_FOUND.getStatusCode()
&& (overriddenStatus != null)
&& !(InstanceStatus.UNKNOWN.name().equals(overriddenStatus))
&& isFromReplicaNode) {
registry.storeOverriddenStatusIfRequired(app.getAppName(), id, InstanceStatus.valueOf(overriddenStatus));
}
} else {
response = Response.ok().build();
}
logger.debug("Found (Renew): {} - {}; reply status={}", app.getName(), id, response.getStatus());
return response;
}
com.netflix.eureka.resources.InstanceResource#validateDirtyTimestamp
private Response validateDirtyTimestamp(Long lastDirtyTimestamp,
boolean isReplication) {
//根据服务名和实例id拿到实例信息
InstanceInfo appInfo = registry.getInstanceByAppAndId(app.getName(), id, false);
if (appInfo != null) {
//lastDirtyTimestamp:客户端传过来的该实例最后一次发送心跳续约的时间戳
//appInfo.getLastDirtyTimestamp():eureka server保存到的该实例最后一次发送心跳续约的时间戳
if ((lastDirtyTimestamp != null) &&
(!lastDirtyTimestamp.equals(appInfo.getLastDirtyTimestamp()))) {
Object[] args = {id, appInfo.getLastDirtyTimestamp(), lastDirtyTimestamp, isReplication};
//客户端传递过来的lastDirtyTimestamp比服务端保存的实例的lastDirtyTimestamp大 (正常情况下应该是相等的)
if (lastDirtyTimestamp > appInfo.getLastDirtyTimestamp()) {
return Response.status(Status.NOT_FOUND).build();
} else if (appInfo.getLastDirtyTimestamp() > lastDirtyTimestamp) {
if (isReplication) {
return Response.status(Status.CONFLICT).entity(appInfo).build();
} else {
return Response.ok().build();
}
}
}
}
return Response.ok().build();
}
2.2 发布事件
具体的业务代码:org.springframework.cloud.netflix.eureka.server.InstanceRegistry#renew
public boolean renew(final String appName, final String serverId,
boolean isReplication) {
log("renew " + appName + " serverId " + serverId + ", isReplication {}"
+ isReplication);
List<Application> applications = getSortedApplications();
//根据客户端传递过来的服务名称和实例id 双层for循环拿到对应的实例信息
for (Application input : applications) {
if (input.getName().equals(appName)) {
InstanceInfo instance = null;
for (InstanceInfo info : input.getInstances()) {
if (info.getId().equals(serverId)) {
instance = info;
break;
}
}
//发布监听
publishEvent(new EurekaInstanceRenewedEvent(this, appName, serverId,
instance, isReplication));
break;
}
}
//父类的心跳续约方法
return super.renew(appName, serverId, isReplication);
}
2.3集群同步(心跳续约后)
com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#renew
public boolean renew(final String appName, final String id, final boolean isReplication) {
//调用父类的心跳续约方法
if (super.renew(appName, id, isReplication)) {
//集群同步
replicateToPeers(Action.Heartbeat, appName, id, null, null, isReplication);
return true;
}
return false;
}
2.4 心跳续约
com.netflix.eureka.registry.AbstractInstanceRegistry#renew
public boolean renew(String appName, String id, boolean isReplication) {
RENEW.increment(isReplication);
//根据服务名称拿到微服务组
Map<String, Lease<InstanceInfo>> gMap = registry.get(appName);
Lease<InstanceInfo> leaseToRenew = null;
//根据微服务id拿到实例信息
if (gMap != null) {
leaseToRenew = gMap.get(id);
}
if (leaseToRenew == null) {
RENEW_NOT_FOUND.increment(isReplication);
logger.warn("DS: Registry: lease doesn't exist, registering resource: {} - {}", appName, id);
return false;
} else {
InstanceInfo instanceInfo = leaseToRenew.getHolder();
if (instanceInfo != null) {
// touchASGCache(instanceInfo.getASGName());
InstanceStatus overriddenInstanceStatus = this.getOverriddenInstanceStatus(
instanceInfo, leaseToRenew, isReplication);
if (overriddenInstanceStatus == InstanceStatus.UNKNOWN) {
logger.info("Instance status UNKNOWN possibly due to deleted override for instance {}"
+ "; re-register required", instanceInfo.getId());
RENEW_NOT_FOUND.increment(isReplication);
return false;
}
//应用实例的状态与覆盖状态不相等,使用覆盖状态覆盖应用实例的状态
if (!instanceInfo.getStatus().equals(overriddenInstanceStatus)) {
logger.info(
"The instance status {} is different from overridden instance status {} for instance {}. "
+ "Hence setting the status to overridden status", instanceInfo.getStatus().name(),
instanceInfo.getOverriddenStatus().name(),
instanceInfo.getId());
instanceInfo.setStatusWithoutDirty(overriddenInstanceStatus);
}
}
//增加每分钟续约次数 自我保护机制会用到
renewsLastMin.increment();
//设置续约后的过期时间
leaseToRenew.renew();
return true;
}
}
3、集群同步
集群同步原理:
- Eureka-Server 集群不区分主从节点或者 Primary & Secondary 节点,所有节点相同角色( 也就是没有角色 ),完全对等。
- Eureka-Client 可以向任意 Eureka-Client 发起任意读写操作
- eureka client选择任意一个Eureka-Server发起服务注册 心跳续约 服务下架 服务状态更新 状态删除操作,eureka server 成功执行这些操作后 会循环eureka server集群内每个节点,将操作复制到另外的 Eureka-Server 以达到最终一致性。注意,Eureka-Server 保证AP。
eureka集群有3个节点 server1 server2 server3
client1选择server2发起注册请求 server2处理完client1的注册请求 (注意 此时并未返回success)会进行集群同步操作 遍历eureka server列表 向server1和server3发出同样的client1注册请求 同时 isReplication为true 表示本次注册请求来自于集群同步 这样server1和server3接收到来自server2的注册请求后 不会再次进行集群同步 从而造成死循环
com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#replicateToPeers负责所有的集群同步
服务注册 心跳续约 服务下架 服务状态更新 状态删除会进行集群同步
private void replicateToPeers(Action action, String appName, String id,
InstanceInfo info /* optional */,
InstanceStatus newStatus /* optional */, boolean isReplication) {
//isReplication:是否来自于集群同步 判断这个请求来自于eureka client还是eureka server(同步)
//action:操作类型
Stopwatch tracer = action.getTimer().start();
try {
if (isReplication) {
numberOfReplicationsLastMin.increment();
}
// If it is a replication already, do not replicate again as this will create a poison replication
if (peerEurekaNodes == Collections.EMPTY_LIST || isReplication) {
//当集群只有单个节点或当前操作(服务注册 心跳续约 服务下架 服务状态更新 状态删除)来自于集群同步 返回
return;
}
//遍历所有的eureka server节点
for (final PeerEurekaNode node : peerEurekaNodes.getPeerEurekaNodes()) {
// If the url represents this host, do not replicate to yourself.
//判断当前节点是否为自己
if (peerEurekaNodes.isThisMyUrl(node.getServiceUrl())) {
continue;
}
//向其他的eureka server发出相同的操作请求
replicateInstanceActionsToPeers(action, appName, id, info, newStatus, node);
}
} finally {
tracer.stop();
}
}
com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl#replicateInstanceActionsToPeers()
private void replicateInstanceActionsToPeers(Action action, String appName,
String id, InstanceInfo info, InstanceStatus newStatus,
PeerEurekaNode node) {
try {
InstanceInfo infoFromRegistry = null;
CurrentRequestVersion.set(Version.V2);
//根据不同的操作 执行对应的动作
switch (action) {
case Cancel:
node.cancel(appName, id);
break;
case Heartbeat:
InstanceStatus overriddenStatus = overriddenInstanceStatusMap.get(id);
infoFromRegistry = getInstanceByAppAndId(appName, id, false);
node.heartbeat(appName, id, infoFromRegistry, overriddenStatus, false);
break;
case Register:
node.register(info);
break;
case StatusUpdate:
infoFromRegistry = getInstanceByAppAndId(appName, id, false);
node.statusUpdate(appName, id, newStatus, infoFromRegistry);
break;
case DeleteStatusOverride:
infoFromRegistry = getInstanceByAppAndId(appName, id, false);
node.deleteStatusOverride(appName, id, infoFromRegistry);
break;
}
} catch (Throwable t) {
logger.error("Cannot replicate information to {} for action {}", node.getServiceUrl(), action.name(), t);
}
}