中国供应商字体解密
https://www.gys.cn/buy/5420348615.html

字体是由坐标画出来的,关键词:rmoveto, rlineto, rrcurveto等

""""
<CharString name="uni100d1">
126 15 -1 rmoveto
0 0 rlineto
-2 7 rlineto
43 15 25 25 7 35 rrcurveto
0 0 rlineto
-11 -11 -12 -6 -13 0 rrcurveto
0 0 rlineto
-27 3 -15 18 -2 33 rrcurveto
0 0 rlineto
1 41 17 21 33 1 rrcurveto
0 0 rlineto
35 -1 18 -25 1 -49 rrcurveto
0 0 rlineto
0 -59 -33 -36 -66 -12 rrcurveto
75 95 rmoveto
0 0 rlineto
2 53 -10 26 -22 -1 rrcurveto
0 0 rlineto
-19 0 -9 -16 0 -32 rrcurveto
0 0 rlineto
-1 -33 10 -16 21 1 rrcurveto
0 0 rlineto
11 1 9 6 8 11 rrcurveto
endchar
</CharString>
"""
import ddddocr
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
# 定义路径命令
commands = [
("rmoveto", 126, 15, -1),
("rlineto", 0, 0),
("rlineto", -2, 7),
("rrcurveto", 43, 15, 25, 25, 7, 35),
("rlineto", 0, 0),
("rrcurveto", -11, -11, -12, -6, -13, 0),
("rlineto", 0, 0),
("rrcurveto", -27, 3, -15, 18, -2, 33),
("rlineto", 0, 0),
("rrcurveto", 1, 41, 17, 21, 33, 1),
("rlineto", 0, 0),
("rrcurveto", 35, -1, 18, -25, 1, -49),
("rlineto", 0, 0),
("rrcurveto", 0, -59, -33, -36, -66, -12),
("rmoveto", 75, 95),
("rlineto", 0, 0),
("rrcurveto", 2, 53, -10, 26, -22, -1),
("rlineto", 0, 0),
("rrcurveto", -19, 0, -9, -16, 0, -32),
("rlineto", 0, 0),
("rrcurveto", -1, -33, 10, -16, 21, 1),
("rlineto", 0, 0),
("rrcurveto", 11, 1, 9, 6, 8, 11),
("endchar",),
]
# 初始化起始点
x, y = 0, 0
# 创建路径数据
path_data = []
for command in commands:
if command[0] == "rmoveto":
x += command[1]
y += command[2]
path_data.append((Path.MOVETO, (x, y)))
elif command[0] == "rlineto":
x += command[1]
y += command[2]
path_data.append((Path.LINETO, (x, y)))
elif command[0] == "rrcurveto":
x1, y1 = x + command[1], y + command[2]
x2, y2 = x1 + command[3], y1 + command[4]
x3, y3 = x2 + command[5], y2 + command[6]
path_data.append((Path.CURVE4, (x1, y1)))
path_data.append((Path.CURVE4, (x2, y2)))
path_data.append((Path.CURVE4, (x3, y3)))
x, y = x3, y3
# 创建路径
codes, verts = zip(*path_data)
path = Path(verts, codes)
# 创建图形
fig, ax = plt.subplots()
patch = patches.PathPatch(path, facecolor="black", lw=2) # 使用黑色填充
ax.add_patch(patch)
ax.set_aspect("equal")
ax.autoscale()
# 保存图形
plt.savefig("charstring.png")
# 使用 ddddocr 识别图像中的字符
ocr = ddddocr.DdddOcr()
with open("charstring.png", "rb") as f:
img_bytes = f.read()
result = ocr.classification(img_bytes)
print("识别结果:", result)
画出来的结果:

ddddocr识别的结果:

使用matplotlib实现
import io
import base64
import ddddocr
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen
from matplotlib import pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch
ocr = ddddocr.DdddOcr()
class MatplotlibPen(BasePen):
"""
MatplotlibPen类继承自BasePen, 用于在Matplotlib中绘制路径。
它收集路径数据,如移动、线条和曲线,并在路径结束时关闭路径。
"""
def __init__(self, glyphSet):
super().__init__(glyphSet)
self.path_data = []
def _moveTo(self, p0):
self.path_data.append((Path.MOVETO, p0))
def _lineTo(self, p1):
self.path_data.append((Path.LINETO, p1))
def _curveToOne(self, p1, p2, p3):
self.path_data.append((Path.CURVE4, p1))
self.path_data.append((Path.CURVE4, p2))
self.path_data.append((Path.CURVE4, p3))
def _closePath(self):
self.path_data.append((Path.CLOSEPOLY, self.path_data[0][1]))
def get_font_data(font_str):
"""获取字体字形映射"""
font_data = base64.b64decode(font_str)
font = TTFont(io.BytesIO(font_data))
result = {}
glyph_set = font.getGlyphSet()
glyph_names = font.getGlyphNames()
for glyph_name in glyph_names:
try:
glyph = glyph_set[glyph_name]
# 创建一个 MatplotlibPen 对象用于绘制字形
pen = MatplotlibPen(glyph_set)
# 使用字形对象绘制字形到 Pen 对象中
glyph.draw(pen)
# 检查 path_data 是否为空
if not pen.path_data:
result[glyph_name] = ""
continue
codes, verts = zip(*pen.path_data)
path = Path(verts, codes)
# 创建图形并设置属性
fig, ax = plt.subplots()
patch = PathPatch(path, facecolor="black", lw=2) # 使用黑色填充
ax.add_patch(patch)
ax.set_aspect("equal")
ax.autoscale_view()
ax.axis("off") # 关闭坐标轴
# 保存图形
# plt.savefig("charstring.png")
# 保存图形到内存中以进行 OCR 识别
with io.BytesIO() as img_bytes:
plt.savefig(img_bytes, format="png")
plt.close() # 清理图形资源
result[glyph_name] = ocr.classification(img_bytes.getvalue())
except Exception:
continue
return result
if __name__ == "__main__":
font_str = "T1RUTwAJAIAAAwAQQ0ZGIIpi0aIAAATIAAADjk9TLzJlk+AaAAABAAAAAGBjbWFwACgMUgAABAQAAACkaGVhZCZ5+TwAAACcAAAANmhoZWEA3wBMAAAA1AAAACRobXR4B3IAAAAACFgAAAAgbWF4cAAIUAAAAAD4AAAABm5hbWUKXYQxAAABYAAAAqNwb3N0AAMAAAAABKgAAAAgAAEAAAABAACUNrWcXw889QADAQAAAAAA43NdSAAAAADjc11IAAT//wB4ALUAAAADAAIAAAAAAAAAAQAAAN3/xQAAAH4AAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAgAAFAAAAgAAAADAH4B9AAFAAACigK7AAAAjAKKArsAAAHfADEBAgAAAAAAAAAAAAAAAIAAAAEAAAAAAAAAAAAAAABYWFhYAEAAywDRAN3/xQAAALUAAQAAAAEAAAAAAG8AtQAAAAAAAAAAACIBngABAAAAAAAAAAEAQgABAAAAAAABAAwAAAABAAAAAAACAAYAJAABAAAAAAADABUAxgABAAAAAAAEABMANgABAAAAAAAFAAsApQABAAAAAAAGABIAbwABAAAAAAAHAAEAQgABAAAAAAAIAAEAQgABAAAAAAAJAAEAQgABAAAAAAAKAAEAQgABAAAAAAALAAEAQgABAAAAAAAMAAEAQgABAAAAAAANAAEAQgABAAAAAAAOAAEAQgABAAAAAAAQAAwAAAABAAAAAAARAAYAJAADAAEECQAAAAIAYQADAAEECQABABgADAADAAEECQACAAwAKgADAAEECQADACoA2wADAAEECQAEACYASQADAAEECQAFABYAsAADAAEECQAGACQAgQADAAEECQAHAAIAYQADAAEECQAIAAIAYQADAAEECQAJAAIAYQADAAEECQAKAAIAYQADAAEECQALAAIAYQADAAEECQAMAAIAYQADAAEECQANAAIAYQADAAEECQAOAAIAYQADAAEECQAQABgADAADAAEECQARAAwAKk9wZW5UeXBlU2FucwBPAHAAZQBuAFQAeQBwAGUAUwBhAG4Ac01lZGl1bQBNAGUAZABpAHUAbU9wZW5UeXBlU2FucyBNZWRpdW0ATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbU9wZW5UeXBlU2Fuc01lZGl1bQBPAHAAZQBuAFQAeQBwAGUAUwBhAG4AcwBNAGUAZABpAHUAbVZlcnNpb24gMC4xAFYAZQByAHMAaQBvAG4AIAAwAC4AMSA6T3BlblR5cGVTYW5zIE1lZGl1bQAgADoATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbQAAAAACAAMAAQAAABQAAwAKAAAANAAEACAAAAAEAAQAAQAAAAD//wAAAAD//wAAAAEAAAAAAAwAAAAAAHAAAAAAAAAACAAAAAAAAAAAAAAAAAABAMsAAQDLAAAAAQABAMwAAQDMAAAAAgABAM0AAQDNAAAAAwABAM4AAQDOAAAABAABAM8AAQDPAAAABQABANAAAQDQAAAABgABANEAAQDRAAAABwADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAEAQABAQETT3BlblR5cGVTYW5zTWVkaXVtAAEBAT/4GwD4HAL4HQP4HgSLivdx9xIFHQAAANcPHQAAAOYRix0AAAOOEh4KADkGJf8eDx4PHgoAOQYl/x4PHg8MBwALAQEMHysxOUFJUVlhaVZlcnNpb24gMC4xT3BlblR5cGVTYW5zIE1lZGl1bU9wZW5UeXBlU2Fuc01lZGl1bXVuaTEwMGNidW5pMTAwY2N1bmkxMDBjZHVuaTEwMGNldW5pMTAwY2Z1bmkxMDBkMHVuaTEwMGQxAAAAAYsBjAGNAY4BjwGQAZEACAIAAQAEAEsAxQEfAVQBggIkApT6lA73Eu2OFUeLBYuSBZuLBZCLjo6LkgiLiwWL9xoFi5GJjYaICIuLBXuDBYiTBbKhBZKHBYv7MQWLhI6IkIsIi4sFm4sFi4QFDvcS7fcwFVCLBYNnBb2MpHaMYQiLiwWIZnV3YYgIi4sFd4uBkIqUCIuLBYuTjo+SiwiLiwWQjJGIk4QIi4sFk4aRiI6LCIuLBZyMlJmMpQiLiwWNq3eaYYoIi4sFiIuKjYuOCIuLBZrYBdWLBYd7BYuIiYmIiwiLiwUO9xLK90EVi4sFdoyAb4tTCIuLBYpRlm+hjQiLiwWhipanisQIi4sFi8OAp3aKCIuTFYuLBayHnm2QUgiLiwWIUnhtaIgIi4sFaI14qYnGCIuLBY/EnqmsjggO9xLpxxWkiwWLdwVyiwWLYgV3iwWLtAVFiwWLngXZ9w4Fl4sFi/sNBTyLFcaLBYvnBVAvBQ73Epb3RRXwiwWLhQVW+0AFdosFv/cxBVOLBYWMiIiKhQiLiwWEdQWFiwWLvgUO9xKy3xWLiwWJkwWnlZmaip4Ii4sFi5uEk36LCIuLBXyMfYF+dwiLiwWFjgWWqpyboowIi4sFpYqYf4xzCIuLBYt8gn56fwiLiwWkiJd8i3EIi4sFiWRzd1yJCIuLBXmMgpCKlAiLiwWMko6PkYwIi4sFjouQiJKGCIuLBYqMjIuNigiLiwWShpGJkIsIi4sFno6VmY2kCIuLBY6qepZmgwgO9xKaihWLiwWJkgW2mqSkkq4Ii4sFgIB/hX6LCIuLBXCOfJ2JrAiLiwWMtJygrIwIi4sFroqdcoxaCIuLBYtQamdJfwjW6hWLiwWNwIGldYoIi4sFeIuCe4trCIuLBYpqlXugjAiLiwWWjJSRk5YIDgAABAAAAAB+AAAAfgAAAH4AAAB+AAAAfgAAAH4AAAB+AAA="
font_map = get_font_data(font_str)
# 15014777339
# "𐃋𐃌𐃍𐃋𐃎𐃏𐃏𐃏𐃐𐃐𐃑"
txt = "𐃋𐃌𐃍𐃋𐃎𐃏𐃏𐃏𐃐𐃐𐃑"
for key, value in font_map.items():
key = key.replace("uni", "&#x").lower() + ";"
txt = txt.replace(key, value)
print(txt)
输出结果:

使用PIL实现,减少内存,提高速度,但是识别准确率低,单纯的数字识别没啥问题
import io
import base64
import ddddocr
from pathlib import Path
from PIL import Image, ImageDraw
from fontTools.ttLib import TTFont
from fontTools.pens.recordingPen import RecordingPen
FILE_DIR = Path(__file__).parent
ocr = ddddocr.DdddOcr()
def render_and_ocr_charstring(points):
"""将字符路径渲染为图像并进行 OCR 识别"""
try:
# 创建一个白色背景的图像(1-bit,减少内存)
width, height = 300, 300
img = Image.new("1", (width, height), "white")
draw = ImageDraw.Draw(img)
# 找到所有坐标的最小 x 和 y 值
min_x = min(p[0] for p in points)
min_y = min(p[1] for p in points)
# 计算偏移量(确保所有坐标点非负)
offset_x = -min_x if min_x < 0 else 0
offset_y = -min_y if min_y < 0 else 0
# 应用整体偏移,使所有坐标都在画布内
points = [(x + offset_x, y + offset_y) for x, y in points]
# 进行镜像翻转(沿 y 轴中心翻转)
points = [(x, height - y) for x, y in points]
# 使用黑色填充闭合区域
draw.polygon(points, fill="black")
img = img.convert("L") # 图像灰度转换
# img.save(f"{FILE_DIR}/charstring_pillow.png")
with io.BytesIO() as img_bytes:
img.save(img_bytes, format="PNG")
img.close() # 释放内部资源
return ocr.classification(img_bytes.getvalue())
except Exception as e:
print(f"OCR failed: {e}")
return ""
def extract_charstring_points(char_string):
"""从 CharString 指令中提取路径点"""
try:
pen = RecordingPen()
char_string.draw(pen)
points = []
for op, coords in pen.value:
if coords:
points.extend(coords)
return points
except Exception as e:
print(f"Failed to extract with pen: {e}")
return []
def extract_charstrings(font_str):
try:
font_data = base64.b64decode(font_str)
font = TTFont(io.BytesIO(font_data))
top_dict = font["CFF "].cff.topDictIndex[0]
return top_dict.CharStrings, top_dict.charset[1:] # 跳过 .notdef
except Exception as e:
print(f"Failed to extract charstrings: {e}")
return {}, []
def map_glyphs_to_text(char_strings, glyph_names):
result = {}
for glyph_name in glyph_names:
char_string = char_strings[glyph_name]
points = extract_charstring_points(char_string)
recognized = render_and_ocr_charstring(points)
result[glyph_name] = recognized
return result
if __name__ == "__main__":
font_str = "T1RUTwAJAIAAAwAQQ0ZGIIpi0aIAAATIAAADjk9TLzJlk+AaAAABAAAAAGBjbWFwACgMUgAABAQAAACkaGVhZCZ5+TwAAACcAAAANmhoZWEA3wBMAAAA1AAAACRobXR4B3IAAAAACFgAAAAgbWF4cAAIUAAAAAD4AAAABm5hbWUKXYQxAAABYAAAAqNwb3N0AAMAAAAABKgAAAAgAAEAAAABAACUNrWcXw889QADAQAAAAAA43NdSAAAAADjc11IAAT//wB4ALUAAAADAAIAAAAAAAAAAQAAAN3/xQAAAH4AAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAgAAFAAAAgAAAADAH4B9AAFAAACigK7AAAAjAKKArsAAAHfADEBAgAAAAAAAAAAAAAAAIAAAAEAAAAAAAAAAAAAAABYWFhYAEAAywDRAN3/xQAAALUAAQAAAAEAAAAAAG8AtQAAAAAAAAAAACIBngABAAAAAAAAAAEAQgABAAAAAAABAAwAAAABAAAAAAACAAYAJAABAAAAAAADABUAxgABAAAAAAAEABMANgABAAAAAAAFAAsApQABAAAAAAAGABIAbwABAAAAAAAHAAEAQgABAAAAAAAIAAEAQgABAAAAAAAJAAEAQgABAAAAAAAKAAEAQgABAAAAAAALAAEAQgABAAAAAAAMAAEAQgABAAAAAAANAAEAQgABAAAAAAAOAAEAQgABAAAAAAAQAAwAAAABAAAAAAARAAYAJAADAAEECQAAAAIAYQADAAEECQABABgADAADAAEECQACAAwAKgADAAEECQADACoA2wADAAEECQAEACYASQADAAEECQAFABYAsAADAAEECQAGACQAgQADAAEECQAHAAIAYQADAAEECQAIAAIAYQADAAEECQAJAAIAYQADAAEECQAKAAIAYQADAAEECQALAAIAYQADAAEECQAMAAIAYQADAAEECQANAAIAYQADAAEECQAOAAIAYQADAAEECQAQABgADAADAAEECQARAAwAKk9wZW5UeXBlU2FucwBPAHAAZQBuAFQAeQBwAGUAUwBhAG4Ac01lZGl1bQBNAGUAZABpAHUAbU9wZW5UeXBlU2FucyBNZWRpdW0ATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbU9wZW5UeXBlU2Fuc01lZGl1bQBPAHAAZQBuAFQAeQBwAGUAUwBhAG4AcwBNAGUAZABpAHUAbVZlcnNpb24gMC4xAFYAZQByAHMAaQBvAG4AIAAwAC4AMSA6T3BlblR5cGVTYW5zIE1lZGl1bQAgADoATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbQAAAAACAAMAAQAAABQAAwAKAAAANAAEACAAAAAEAAQAAQAAAAD//wAAAAD//wAAAAEAAAAAAAwAAAAAAHAAAAAAAAAACAAAAAAAAAAAAAAAAAABAMsAAQDLAAAAAQABAMwAAQDMAAAAAgABAM0AAQDNAAAAAwABAM4AAQDOAAAABAABAM8AAQDPAAAABQABANAAAQDQAAAABgABANEAAQDRAAAABwADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAEAQABAQETT3BlblR5cGVTYW5zTWVkaXVtAAEBAT/4GwD4HAL4HQP4HgSLivdx9xIFHQAAANcPHQAAAOYRix0AAAOOEh4KADkGJf8eDx4PHgoAOQYl/x4PHg8MBwALAQEMHysxOUFJUVlhaVZlcnNpb24gMC4xT3BlblR5cGVTYW5zIE1lZGl1bU9wZW5UeXBlU2Fuc01lZGl1bXVuaTEwMGNidW5pMTAwY2N1bmkxMDBjZHVuaTEwMGNldW5pMTAwY2Z1bmkxMDBkMHVuaTEwMGQxAAAAAYsBjAGNAY4BjwGQAZEACAIAAQAEAEsAxQEfAVQBggIkApT6lA73Eu2OFUeLBYuSBZuLBZCLjo6LkgiLiwWL9xoFi5GJjYaICIuLBXuDBYiTBbKhBZKHBYv7MQWLhI6IkIsIi4sFm4sFi4QFDvcS7fcwFVCLBYNnBb2MpHaMYQiLiwWIZnV3YYgIi4sFd4uBkIqUCIuLBYuTjo+SiwiLiwWQjJGIk4QIi4sFk4aRiI6LCIuLBZyMlJmMpQiLiwWNq3eaYYoIi4sFiIuKjYuOCIuLBZrYBdWLBYd7BYuIiYmIiwiLiwUO9xLK90EVi4sFdoyAb4tTCIuLBYpRlm+hjQiLiwWhipanisQIi4sFi8OAp3aKCIuTFYuLBayHnm2QUgiLiwWIUnhtaIgIi4sFaI14qYnGCIuLBY/EnqmsjggO9xLpxxWkiwWLdwVyiwWLYgV3iwWLtAVFiwWLngXZ9w4Fl4sFi/sNBTyLFcaLBYvnBVAvBQ73Epb3RRXwiwWLhQVW+0AFdosFv/cxBVOLBYWMiIiKhQiLiwWEdQWFiwWLvgUO9xKy3xWLiwWJkwWnlZmaip4Ii4sFi5uEk36LCIuLBXyMfYF+dwiLiwWFjgWWqpyboowIi4sFpYqYf4xzCIuLBYt8gn56fwiLiwWkiJd8i3EIi4sFiWRzd1yJCIuLBXmMgpCKlAiLiwWMko6PkYwIi4sFjouQiJKGCIuLBYqMjIuNigiLiwWShpGJkIsIi4sFno6VmY2kCIuLBY6qepZmgwgO9xKaihWLiwWJkgW2mqSkkq4Ii4sFgIB/hX6LCIuLBXCOfJ2JrAiLiwWMtJygrIwIi4sFroqdcoxaCIuLBYtQamdJfwjW6hWLiwWNwIGldYoIi4sFeIuCe4trCIuLBYpqlXugjAiLiwWWjJSRk5YIDgAABAAAAAB+AAAAfgAAAH4AAAB+AAAAfgAAAH4AAAB+AAA="
# 15014777339
txt = "𐃋𐃌𐃍𐃋𐃎𐃏𐃏𐃏𐃐𐃐𐃑"
char_strings, glyph_names = extract_charstrings(font_str)
glyph_map = map_glyphs_to_text(char_strings, glyph_names)
for key, value in glyph_map.items():
txt = txt.replace(key.replace("uni", "&#x").lower() + ";", value)
print(txt)
浙公网安备 33010602011771号