python json字符串类型的value换行方案
按照标准json语法,字符串类型的value是不能换行写的.
例如,以下是错误的写法
{
"key":"hello
world"
}
但是遇到了需要在json中写代码与服务器交互的情况,无奈只能这样写:
{
"key":"var a = 1
var b = 2
var c=a+b"
}
代码行数少了还好,多了极其**,于是想办法可以在json中换行写代码.
尝试1: json5
JSON5号称”JSON for Humans”,相比标准json主要特点如下:
- 字符串value支持换行
- key可以不加””
- key和value可以使用””代替””
- 支持注释
- 二进制数值
栗子:
以下是一个合法的json5
{
// comments
unquoted: "and you can quote me on that",
singleQuotes: "I can use "double quotes" here",
lineBreaks: "Look, Mom!
No
"s!",
hexadecimal: 0xdecaf,
leadingDecimalPoint: .8675309, andTrailing: 8675309.,
positiveSign: +1,
trailingComma: "in objects", andIn: ["arrays",],
"backwardsCompatible": "with JSON",
}
优点:现成
缺点:
-
换行需要用 标识;
-
解析之后因为没有了换行,代码无法执行,所以是达不到目的的;
{
"key":"var a = 1
var b = 2
var c=a+b"
}
解析之后的结果为
{
"key":"var a = 1var b = 2var c=a+b"
}
尝试2: 编写自己的json解释器
- 优点: 可根据自己定义的规则解析数据
- 缺点: 成本高.
以下为用python实现的json解释器,支持换行,目前没有做完整的错误处理,请不要用于生产:
"""
遇到问题没人解答?小编创建了一个Python学习交流群:778463939
寻找有志同道合的小伙伴,互帮互助,群里还有不错的视频学习教程和PDF电子书!
Topic: 下降解析器
Desc :
"""
import re
import collections
# Token specification
NUM = r"(?P<NUM>d+)"
STR_D = r""(?P<STR_D>[^"]*?)""
STR_S = r"(?P<STR_S>".*?")"
COLON = r"(?P<COLON>:)"
COMMA = r"(?P<COMMA>,)"
LLB = r"(?P<LLB>[)"
RLB = r"(?P<RLB>])"
LDB = r"(?P<LDB>{)"
RDB = r"(?P<RDB>})"
WS = r"(?P<WS>s+)"
NULL = r"(?P<NULL>null)"
FALSE = r"(?P<FALSE>false)"
TRUE = r"(?P<TRUE>true)"
master_pat = re.compile("|".join([NUM, STR_D, STR_S, LLB, RLB,
LDB, RDB, COLON, COMMA, FALSE, TRUE, NULL, WS]), re.M | re.S)
# Tokenizer
Token = collections.namedtuple("Token", ["type", "value"])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup, m.group())
if tok.type != "WS":
yield tok
def simple_join(text):
li = []
for t in generate_tokens(text):
res = t[1]
if t[0] == "STR_S":
res = t[1].replace(""", """)
elif t[0].startswith("STR_"):
res = t[1].replace("
", "
").replace("
", "
")
li.append(str(res))
return "".join(li)
class Evaluator:
def __init__(self, text):
self.text = text
def parse(self):
self.tokens = generate_tokens(self.text)
self.nexttok = next(self.tokens, None)
self._advance()
return self._parse()
def _advance(self):
"""Advance one token ahead"""
self.tok, self.nexttok = self.nexttok, next(self.tokens, None)
def _parse(self):
if self.tok[0] == "LLB":
return self.get_list()
if self.tok[0] == "LDB":
return self.get_dict()
if self.tok[0].startswith("STR_"):
return self.tok[1].strip(""")
if self.tok[0] == "NUM":
return int(self.tok[1])
if self.tok[0] == "NULL":
return None
if self.tok[0] == "FALSE":
return False
if self.tok[0] == "TRUE":
return True
raise Exception("未知token:{}".format(self.tok[1]))
def get_dict(self):
"""
{ 开头
中间内容必须为 k:v,
结尾必须为 }
"""
res = {}
self._advance()
# }
if self.tok[0] == "RDB":
return {}
def parse_value():
if not self.tok[0].startswith("STR_"):
raise Exception(f"KEY需是字符串形式,{self.tok[1]}")
new_key = self._parse()
self._advance()
if self.tok[0] != "COLON":
raise Exception("KEY和VALUE需用:分割")
self._advance()
res[new_key] = self._parse()
self._advance()
parse_value()
while self.tok[0] == "COMMA":
self._advance()
parse_value()
return res
def get_list(self):
res = []
self._advance()
# }
if self.tok[0] == "RLB":
return []
def parse_value():
if self.tok[0] in ["NUM", "STR_D", "STR_S", "NULL", "FALSE", "TRUE"]:
res.append(self._parse())
elif self.tok[0] == "LLB":
res.append(self.get_list())
elif self.tok[0] == "LDB":
res.append(self.get_dict())
parse_value()
self._advance()
while self.tok[0] == "COMMA":
self._advance()
parse_value()
self._advance()
return res
if __name__ == "__main__":
text = """
{
"k1":1,
"k2":"v2",
"is_true":true,
"is_none":null,
"k2":"v2",
"k3":{
"a1":"a1",
"a2":"a2",
"code":"
var code = 0
code = 1
"
},
"list":[1,2,3,4]
}
"""
# test(text)
e = Evaluator(text)
res = e.parse()
print(res) #{"k1": 1, "k2": "v2", "is_true": True, "is_none": None, "k3": {"a1": "a1", "a2": "a2", "code": "
var code = 0
code = 1
"}, "list": [1, 2, 3, 4]}
尝试3: 全局替换
为
优点:简单粗暴
{
"key":"
var a = 1
var b = 2
var c=a+b"
}
替换之后的结果为
{
"key":"
var a = 1
var b = 2
var c=a+b"
}
缺点:虽然写的时候有换行,但是上传到服务器再查询的时候只能看到
,依然缺乏可读性;
尝试4(最终采用方案):
结合UI将json生成树状节点,将代码类型的value单独显示.
优点
- 难度一般
- 按常规方式解析json
- 上传到服务器再查询的时候也能看到换行.