总结
使用curator访问zookeeper时,有几个状态。注:下面的t是指与zk server协商后的session timeout时间。
- SUSPENDED:如果客户端与某个zk server长时间心跳不满足,进入SUSPENDED状态。即2t/3没有收到服务端任何信息,就会进入SUSPENDED状态,此时会重连其他服务器。
- LOST:当客户端因为SUSPENDED状态而发起重连逻辑,并成功连接到server端之后,此时server端发现上次的session过期了,那么就会转变状态为LOST状态,并通知客户端。
- RECONNECTED:上面的zk server通知到客户端LOST之后,重建了session,通知客户端,连接重建了;当然如果客户端只是2t/3之后进入了SUSPENDED状态,之后成功连接到了其他服务器,zk cluster没有认为session过期,那么就不会有LOST状态,而只是进入RECONNECTED。
LOST和RECONNECTED除了session是否过期之外,还有其他区别?或者session过期还触发了什么其他问题? 例如:session过期后,TreeCache是不是需要重新的初始化?session过期,重连上之后,确实是触发了INITIALIZED事件。如果是在重建连接之前,在ZK上发生了数据变更,当重连之后,首先推送CONNECTION_RECONNECTED事件,然后数据变更NODE_ADDED、NODE_REMOVED事件,最后推送INITIALIZED事件。即使没有数据变更,依然会触发INITIALIZED事件。
如果只是进行了RECONNECTED,而session没有过期呢? 不论在重连之前有没有数据变更,都不会有INITIALIZED事件。但是如果有数据变更的话,是可以发现数据变更的。
也就是说,对于Cache来讲,只有当session重建之后才会触发INITIALIZED事件。但不论session是否重建,都会触发数据变更事件。
如果session没有过期,那么session只是进行了透明的转移。依然使用这个session进行重建连接。 那么有个问题,服务端怎么判断这个session有没有过期呢?
session是否过期是由服务端决定的,而不是客户端。服务端告知客户端session过期。
假设session超时时间是t,那么当t时间之后服务器收不到客户端的任何信息,就会认为session过期。对于客户端而言,如果t/3时间内未收到服务端的任何信息,那么就会发送心跳信息给服务端;如果2t/3时间后,还是未收到服务端的任何信息,则会开始尝试其他服务器,此时它有t/3的时间去寻找。
客户端会在距离与zk服务端上次发送信息的时间超过sessionTimeout*(2/3)/2时间后发送心跳,如果心跳失败,会在距离与zk服务端上次接收信息的时间超过sessionTimeout*(2/3)后进入客户端session超时逻辑,而进行重连。
curator的一些Cache,会有INITIALIZED状态,对于TreeCache,当初始化所有的node信息之后(此时是通过NODE_ADDED事件),触发INITIALIZED状态。只有转移到了INITIALIZED状态,才能从TreeCache中获取数据,否则会有空指针的问题。
对于TreeCache而言,即使处于SUSPENDED状态,cache并不会失效,依然可以从中获取数据。
测试代码
ZkPathChildrenCacheTest
只能发现node的子节点的变化,如增加子节点、删除子节点、更新子节点的数据。但是无法发现本身节点的数据变化。 下面例子中,/example/cache的数据变化是无法被发现的。
public class ZkPathChildrenCacheTest {
private static final String PATH = "/example/cache";
private static PathChildrenCache cache = null;
public static void main(String[] args) throws Exception {
CuratorFramework client = null;
String zkServers = "172.16.16.47:2182,172.16.16.48:2182,172.16.16.49:2182";
try {
client = CuratorFrameworkFactory.newClient(zkServers,
new ExponentialBackoffRetry(1000, 3));
client.start();
// in this example we will cache data. Notice that this is optional.
cache = new PathChildrenCache(client, PATH, true);
cache.start();
processCommands(client, cache);
} finally {
CloseableUtils.closeQuietly(cache);
CloseableUtils.closeQuietly(client);
}
}
private static void addListener(PathChildrenCache cache)
{
// a PathChildrenCacheListener is optional. Here, it's used just to log changes
PathChildrenCacheListener listener = new PathChildrenCacheListener() {
@Override
public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception
{
System.out.println("hello.....");
switch ( event.getType() )
{
case CHILD_ADDED:
{
System.out.println("Node added: " + ZKPaths.getNodeFromPath(event.getData().getPath()) +
" ,data:" + new String(event.getData().getData()));
break;
}
case CHILD_UPDATED:
{
System.out.println("Node changed: " + ZKPaths.getNodeFromPath(event.getData().getPath()) +
" ,data:" + new String(event.getData().getData()));
break;
}
case CHILD_REMOVED:
{
System.out.println("Node removed: " + ZKPaths.getNodeFromPath(event.getData().getPath()));
break;
}
}
}
};
cache.getListenable().addListener(listener);
}
private static void processCommands(CuratorFramework client, PathChildrenCache cache) throws Exception {
// More scaffolding that does a simple command line processor
printHelp();
try {
addListener(cache);
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
boolean done = false;
while ( !done ) {
System.out.print("> ");
String line = in.readLine();
if ( line == null ) {
break;
}
String command = line.trim();
String[] parts = command.split("\\s");
if ( parts.length == 0 ) {
continue;
}
String operation = parts[0];
String args[] = Arrays.copyOfRange(parts, 1, parts.length);
if ( operation.equalsIgnoreCase("help") || operation.equalsIgnoreCase("?") ) {
printHelp();
}
else if ( operation.equalsIgnoreCase("q") || operation.equalsIgnoreCase("quit") ) {
done = true;
}
else if ( operation.equals("set") ) {
setValue(client, command, args);
}
else if ( operation.equals("remove") ) {
remove(client, command, args);
}
else if ( operation.equals("list") ) {
list(cache);
}
Thread.sleep(1000); // just to allow the console output to catch up
}
} catch (Throwable throwable) {
throwable.printStackTrace();
}
}
private static void list(PathChildrenCache cache)
{
if ( cache.getCurrentData().size() == 0 ) {
System.out.println("* empty *");
}
else {
for ( ChildData data : cache.getCurrentData() )
{
System.out.println(data.getPath() + " = " + new String(data.getData()));
}
}
}
private static void remove(CuratorFramework client, String command, String[] args) throws Exception
{
if ( args.length != 1 ) {
System.err.println("syntax error (expected remove <path>): " + command);
return;
}
String name = args[0];
if ( name.contains("/") ) {
System.err.println("Invalid node name" + name);
return;
}
String path = ZKPaths.makePath(PATH, name);
try {
client.delete().forPath(path);
} catch ( KeeperException.NoNodeException e ) {
// ignore
}
}
private static void setValue(CuratorFramework client, String command, String[] args) throws Exception
{
if ( args.length != 2 ) {
System.err.println("syntax error (expected set <path> <value>): " + command);
return;
}
String name = args[0];
if ( name.contains("/") ) {
System.err.println("Invalid node name" + name);
return;
}
String path = ZKPaths.makePath(PATH, name);
byte[] bytes = args[1].getBytes();
try {
client.setData().forPath(path, bytes);
} catch ( KeeperException.NoNodeException e ) {
client.create().creatingParentContainersIfNeeded().forPath(path, bytes);
}
}
private static void printHelp()
{
System.out.println("An example of using PathChildrenCache. This example is driven by entering commands at the prompt:\n");
System.out.println("set <name> <value>: Adds or updates a node with the given name");
System.out.println("remove <name>: Deletes the node with the given name");
System.out.println("list: List the nodes/values in the cache");
System.out.println("quit: Quit the example");
System.out.println();
}
}
ZkNodeCacheTest
只能发现本身节点的数据变化。本身节点被删除也能发现,但是需要注意,回调方法的写法,防止出现空指针。本身节点的创建也可以被发现。
public class ZkNodeCacheTest {
private static final String PATH = "/example/cache";
public static void main(String[] args) throws Exception {
String zkServers = "172.16.16.47:2182,172.16.16.48:2182,172.16.16.49:2182";
CuratorFramework client = CuratorFrameworkFactory.builder().connectString(zkServers)
.retryPolicy(new ExponentialBackoffRetry(1000, 3)).build();
client.start();
NodeCache nodeCache = new NodeCache(client, PATH);
nodeCache.getListenable().addListener(new NodeCacheListener() {
@Override
public void nodeChanged() throws Exception {
System.out.println("node cache changed, path: " + nodeCache.getCurrentData().getPath()
+ " ,data: " + new String(nodeCache.getCurrentData().getData()));
}
});
nodeCache.start();
System.in.read();
}
}
ZkTreeCacheTest
既可以发现本身节点的数据变化,又可以发现子节点的创建、子节点的数据更新、子节点的删除等。
public class ZkTreeCacheTest {
private static final String PATH = "/example/cache";
private static CountDownLatch LATCH = new CountDownLatch(1);
public static void main(String[] args) throws Exception {
String zkServer = "172.16.16.47:2182,172.16.16.48:2182,172.16.16.49:2182";
CuratorFramework client = CuratorFrameworkFactory.builder().connectString(zkServer)
.retryPolicy(new ExponentialBackoffRetry(1000, 3))
.build();
client.start();
TreeCache treeCache = new TreeCache(client, PATH);
treeCache.getListenable().addListener(new TreeCacheListener() {
@Override
public void childEvent(CuratorFramework client, TreeCacheEvent event) throws Exception {
System.out.println("data: " + new String(treeCache.getCurrentData(PATH).getData()));
Map<String, ChildData> map = treeCache.getCurrentChildren(PATH);
for (Map.Entry<String, ChildData> entry : map.entrySet()) {
String nodeName = entry.getKey();
System.out.println("child, key: " + entry.getKey() +
" .child, path: " + entry.getValue().getPath() +
" , data: " + new String(entry.getValue().getData()));
Map<String, ChildData> children = treeCache.getCurrentChildren(ZKPaths.makePath(PATH, nodeName));
for (Map.Entry<String, ChildData> childDataEntry : children.entrySet()) {
System.out.println("children: key: " + childDataEntry.getKey() +
" .child, path: " + childDataEntry.getValue().getPath() +
" , data: " + new String(childDataEntry.getValue().getData()));
}
}
switch (event.getType()) {
case INITIALIZED:
LATCH.countDown();
System.out.println("INITIALIZED");
break;
case NODE_ADDED:
System.out.println("NODE_ADDED, path: " + event.getData().getPath() + " ,data: " + new String(event.getData().getData()));
break;
case NODE_UPDATED:
System.out.println("NODE_UPDATED, path: " + event.getData().getPath() + " ,data: " + new String(event.getData().getData()));
break;
case NODE_REMOVED:
System.out.println("NODE_REMOVED, path: " + event.getData().getPath() + " ,data: " + new String(event.getData().getData()));
break;
case CONNECTION_RECONNECTED:
System.out.println("CONNECTION_RECONNECTED");
break;
case CONNECTION_LOST:
// LATCH = new CountDownLatch(1);
System.out.println("CONNECTION_LOST");
break;
case CONNECTION_SUSPENDED:
System.out.println("CONNECTION_SUSPENDED");
break;
default:
System.out.println("default");
break;
}
}
});
treeCache.start();
LATCH.await();
System.out.println("xxxxx, /example/cache/test/test-child2: data, " + new String(treeCache.getCurrentData("/example/cache").getData()));
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
while (true) {
try {
Map<String, ChildData> map = treeCache.getCurrentChildren("/example/cache/test");
for (Map.Entry<String, ChildData> entry : map.entrySet()) {
System.out.println("XXXXXXXXXXXXXXX");
System.out.println(entry.getKey());
System.out.println(entry.getValue().getPath());
System.out.println(new String(entry.getValue().getData()));
}
} catch (Exception e) {
e.printStackTrace();
}
try {
TimeUnit.MILLISECONDS.sleep(500);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
thread.start();
Map<String, ChildData> map = treeCache.getCurrentChildren("/not/exist");
System.out.println("FFFFFFFFFFFFFFFFFF, /not/exist, children: " + (MapUtils.isEmpty(map) ? "NULL" : map.keySet()));
if (map == null) {
// 输出
System.out.println("NNNNNNNNNNNNNuLL");
}
System.in.read();
}
}
参考
-# Zookeeper Client should re-resolve hosts when connection attempts fail
-# Re-try DNS hostname -> IP resolution if node connection fails