从深度嵌套的JSON结构中提取值

77 阅读1分钟

0612(1).png

  1. 问题背景
    在某些情况下,我们可能需要从深度嵌套的JSON结构中提取值。例如,给定以下JSON结构:
{
    "foo_code": 404,
    "foo_rbody": {
        "query": {
            "info": {
                "acme_no": "444444",
                "road_runner": "123"
            },
            "error": "no_lunch",
            "message": "runner problem."
        }
    },
    "acme_no": "444444",
    "road_runner": "123",
    "xyzzy_code": 200,
    "xyzzy_rbody": {
        "api": {
            "items": [
                {
                    "desc": "OK",
                    "id": 198,
                    "acme_no": "789",
                    "road_runner": "123",
                    "params": {
                        "bicycle": "2wheel",
                        "willie": "hungry",
                        "height": "1",
                        "coyote_id": "1511111"
                    },
                    "activity": "TRAP",
                    "state": "active",
                    "status": 200,
                    "type": "chase"
                }
            ]
        }
    }
}

我们可能需要提取以下值:

  • foo_rbody.query.info.acme_no
  • foo_rbody.query.info.road_runner
  • xyzzy_rbody.api.items[0].params.bicycle
  1. 解决方案
    有多种方法可以从深度嵌套的JSON结构中提取值。以下是一些最常用的方法:
  • 使用get_path()函数
import re

def get_path(dct, path):
    for i, p in re.findall(r'(\d+)|(\w+)', path):
        dct = dct[p or int(i)]
    return dct

value = get_path(data, "xyzzy_rbody.api.items[0].params.bicycle")
  • 使用递归函数
def get_value(dct, path):
    if isinstance(dct, dict):
        for key, value in dct.items():
            if key == path:
                return value
            else:
                result = get_value(value, path)
                if result is not None:
                    return result
    elif isinstance(dct, list):
        for i, item in enumerate(dct):
            if i == path:
                return item
            else:
                result = get_value(item, path)
                if result is not None:
                    return result
    return None

value = get_value(data, "xyzzy_rbody.api.items[0].params.bicycle")
  • 使用Pynq库
import pynq

data = {
    "foo_code": 404,
    "foo_rbody": {
        "query": {
            "info": {
                "acme_no": "444444",
                "road_runner": "123"
            },
            "error": "no_lunch",
            "message": "runner problem."
        }
    },
    "acme_no": "444444",
    "road_runner": "123",
    "xyzzy_code": 200,
    "xyzzy_rbody": {
        "api": {
            "items": [
                {
                    "desc": "OK",
                    "id": 198,
                    "acme_no": "789",
                    "road_runner": "123",
                    "params": {
                        "bicycle": "2wheel",
                        "willie": "hungry",
                        "height": "1",
                        "coyote_id": "1511111"
                    },
                    "activity": "TRAP",
                    "state": "active",
                    "status": 200,
                    "type": "chase"
                }
            ]
        }
    }
}

query = pynq.select("xyzzy_rbody.api.items[0].params.bicycle")
value = query(data)