中国供应商字体解密

https://www.gys.cn/buy/5420348615.html

字体是由坐标画出来的,关键词:rmoveto, rlineto, rrcurveto等

""""
<CharString name="uni100d1">
    126 15 -1 rmoveto
    0 0 rlineto
    -2 7 rlineto
    43 15 25 25 7 35 rrcurveto
    0 0 rlineto
    -11 -11 -12 -6 -13 0 rrcurveto
    0 0 rlineto
    -27 3 -15 18 -2 33 rrcurveto
    0 0 rlineto
    1 41 17 21 33 1 rrcurveto
    0 0 rlineto
    35 -1 18 -25 1 -49 rrcurveto
    0 0 rlineto
    0 -59 -33 -36 -66 -12 rrcurveto
    75 95 rmoveto
    0 0 rlineto
    2 53 -10 26 -22 -1 rrcurveto
    0 0 rlineto
    -19 0 -9 -16 0 -32 rrcurveto
    0 0 rlineto
    -1 -33 10 -16 21 1 rrcurveto
    0 0 rlineto
    11 1 9 6 8 11 rrcurveto
    endchar
</CharString>
"""
import ddddocr
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches


# 定义路径命令
commands = [
    ("rmoveto", 126, 15, -1),
    ("rlineto", 0, 0),
    ("rlineto", -2, 7),
    ("rrcurveto", 43, 15, 25, 25, 7, 35),
    ("rlineto", 0, 0),
    ("rrcurveto", -11, -11, -12, -6, -13, 0),
    ("rlineto", 0, 0),
    ("rrcurveto", -27, 3, -15, 18, -2, 33),
    ("rlineto", 0, 0),
    ("rrcurveto", 1, 41, 17, 21, 33, 1),
    ("rlineto", 0, 0),
    ("rrcurveto", 35, -1, 18, -25, 1, -49),
    ("rlineto", 0, 0),
    ("rrcurveto", 0, -59, -33, -36, -66, -12),
    ("rmoveto", 75, 95),
    ("rlineto", 0, 0),
    ("rrcurveto", 2, 53, -10, 26, -22, -1),
    ("rlineto", 0, 0),
    ("rrcurveto", -19, 0, -9, -16, 0, -32),
    ("rlineto", 0, 0),
    ("rrcurveto", -1, -33, 10, -16, 21, 1),
    ("rlineto", 0, 0),
    ("rrcurveto", 11, 1, 9, 6, 8, 11),
    ("endchar",),
]

# 初始化起始点
x, y = 0, 0

# 创建路径数据
path_data = []
for command in commands:
    if command[0] == "rmoveto":
        x += command[1]
        y += command[2]
        path_data.append((Path.MOVETO, (x, y)))
    elif command[0] == "rlineto":
        x += command[1]
        y += command[2]
        path_data.append((Path.LINETO, (x, y)))
    elif command[0] == "rrcurveto":
        x1, y1 = x + command[1], y + command[2]
        x2, y2 = x1 + command[3], y1 + command[4]
        x3, y3 = x2 + command[5], y2 + command[6]
        path_data.append((Path.CURVE4, (x1, y1)))
        path_data.append((Path.CURVE4, (x2, y2)))
        path_data.append((Path.CURVE4, (x3, y3)))
        x, y = x3, y3

# 创建路径
codes, verts = zip(*path_data)
path = Path(verts, codes)

# 创建图形
fig, ax = plt.subplots()
patch = patches.PathPatch(path, facecolor="black", lw=2)  # 使用黑色填充
ax.add_patch(patch)
ax.set_aspect("equal")
ax.autoscale()

# 保存图形
plt.savefig("charstring.png")

# 使用 ddddocr 识别图像中的字符
ocr = ddddocr.DdddOcr()
with open("charstring.png", "rb") as f:
    img_bytes = f.read()
result = ocr.classification(img_bytes)

print("识别结果:", result)

画出来的结果:

ddddocr识别的结果:

使用matplotlib实现

import io
import base64
import ddddocr
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen

from matplotlib import pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch


ocr = ddddocr.DdddOcr()


class MatplotlibPen(BasePen):
    """
    MatplotlibPen类继承自BasePen, 用于在Matplotlib中绘制路径。
    它收集路径数据,如移动、线条和曲线,并在路径结束时关闭路径。
    """

    def __init__(self, glyphSet):
        super().__init__(glyphSet)
        self.path_data = []

    def _moveTo(self, p0):
        self.path_data.append((Path.MOVETO, p0))

    def _lineTo(self, p1):
        self.path_data.append((Path.LINETO, p1))

    def _curveToOne(self, p1, p2, p3):
        self.path_data.append((Path.CURVE4, p1))
        self.path_data.append((Path.CURVE4, p2))
        self.path_data.append((Path.CURVE4, p3))

    def _closePath(self):
        self.path_data.append((Path.CLOSEPOLY, self.path_data[0][1]))


def get_font_data(font_str):
    """获取字体字形映射"""
    font_data = base64.b64decode(font_str)
    font = TTFont(io.BytesIO(font_data))

    result = {}

    glyph_set = font.getGlyphSet()
    glyph_names = font.getGlyphNames()
    for glyph_name in glyph_names:
        try:
            glyph = glyph_set[glyph_name]
            # 创建一个 MatplotlibPen 对象用于绘制字形
            pen = MatplotlibPen(glyph_set)
            # 使用字形对象绘制字形到 Pen 对象中
            glyph.draw(pen)

            # 检查 path_data 是否为空
            if not pen.path_data:
                result[glyph_name] = ""
                continue

            codes, verts = zip(*pen.path_data)
            path = Path(verts, codes)

            # 创建图形并设置属性
            fig, ax = plt.subplots()
            patch = PathPatch(path, facecolor="black", lw=2)  # 使用黑色填充
            ax.add_patch(patch)
            ax.set_aspect("equal")
            ax.autoscale_view()
            ax.axis("off")  # 关闭坐标轴

            # 保存图形
            # plt.savefig("charstring.png")

            # 保存图形到内存中以进行 OCR 识别
            with io.BytesIO() as img_bytes:
                plt.savefig(img_bytes, format="png")
                plt.close()  # 清理图形资源

                result[glyph_name] = ocr.classification(img_bytes.getvalue())
        except Exception:
            continue

    return result


if __name__ == "__main__":
    font_str = "T1RUTwAJAIAAAwAQQ0ZGIIpi0aIAAATIAAADjk9TLzJlk+AaAAABAAAAAGBjbWFwACgMUgAABAQAAACkaGVhZCZ5+TwAAACcAAAANmhoZWEA3wBMAAAA1AAAACRobXR4B3IAAAAACFgAAAAgbWF4cAAIUAAAAAD4AAAABm5hbWUKXYQxAAABYAAAAqNwb3N0AAMAAAAABKgAAAAgAAEAAAABAACUNrWcXw889QADAQAAAAAA43NdSAAAAADjc11IAAT//wB4ALUAAAADAAIAAAAAAAAAAQAAAN3/xQAAAH4AAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAgAAFAAAAgAAAADAH4B9AAFAAACigK7AAAAjAKKArsAAAHfADEBAgAAAAAAAAAAAAAAAIAAAAEAAAAAAAAAAAAAAABYWFhYAEAAywDRAN3/xQAAALUAAQAAAAEAAAAAAG8AtQAAAAAAAAAAACIBngABAAAAAAAAAAEAQgABAAAAAAABAAwAAAABAAAAAAACAAYAJAABAAAAAAADABUAxgABAAAAAAAEABMANgABAAAAAAAFAAsApQABAAAAAAAGABIAbwABAAAAAAAHAAEAQgABAAAAAAAIAAEAQgABAAAAAAAJAAEAQgABAAAAAAAKAAEAQgABAAAAAAALAAEAQgABAAAAAAAMAAEAQgABAAAAAAANAAEAQgABAAAAAAAOAAEAQgABAAAAAAAQAAwAAAABAAAAAAARAAYAJAADAAEECQAAAAIAYQADAAEECQABABgADAADAAEECQACAAwAKgADAAEECQADACoA2wADAAEECQAEACYASQADAAEECQAFABYAsAADAAEECQAGACQAgQADAAEECQAHAAIAYQADAAEECQAIAAIAYQADAAEECQAJAAIAYQADAAEECQAKAAIAYQADAAEECQALAAIAYQADAAEECQAMAAIAYQADAAEECQANAAIAYQADAAEECQAOAAIAYQADAAEECQAQABgADAADAAEECQARAAwAKk9wZW5UeXBlU2FucwBPAHAAZQBuAFQAeQBwAGUAUwBhAG4Ac01lZGl1bQBNAGUAZABpAHUAbU9wZW5UeXBlU2FucyBNZWRpdW0ATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbU9wZW5UeXBlU2Fuc01lZGl1bQBPAHAAZQBuAFQAeQBwAGUAUwBhAG4AcwBNAGUAZABpAHUAbVZlcnNpb24gMC4xAFYAZQByAHMAaQBvAG4AIAAwAC4AMSA6T3BlblR5cGVTYW5zIE1lZGl1bQAgADoATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbQAAAAACAAMAAQAAABQAAwAKAAAANAAEACAAAAAEAAQAAQAAAAD//wAAAAD//wAAAAEAAAAAAAwAAAAAAHAAAAAAAAAACAAAAAAAAAAAAAAAAAABAMsAAQDLAAAAAQABAMwAAQDMAAAAAgABAM0AAQDNAAAAAwABAM4AAQDOAAAABAABAM8AAQDPAAAABQABANAAAQDQAAAABgABANEAAQDRAAAABwADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAEAQABAQETT3BlblR5cGVTYW5zTWVkaXVtAAEBAT/4GwD4HAL4HQP4HgSLivdx9xIFHQAAANcPHQAAAOYRix0AAAOOEh4KADkGJf8eDx4PHgoAOQYl/x4PHg8MBwALAQEMHysxOUFJUVlhaVZlcnNpb24gMC4xT3BlblR5cGVTYW5zIE1lZGl1bU9wZW5UeXBlU2Fuc01lZGl1bXVuaTEwMGNidW5pMTAwY2N1bmkxMDBjZHVuaTEwMGNldW5pMTAwY2Z1bmkxMDBkMHVuaTEwMGQxAAAAAYsBjAGNAY4BjwGQAZEACAIAAQAEAEsAxQEfAVQBggIkApT6lA73Eu2OFUeLBYuSBZuLBZCLjo6LkgiLiwWL9xoFi5GJjYaICIuLBXuDBYiTBbKhBZKHBYv7MQWLhI6IkIsIi4sFm4sFi4QFDvcS7fcwFVCLBYNnBb2MpHaMYQiLiwWIZnV3YYgIi4sFd4uBkIqUCIuLBYuTjo+SiwiLiwWQjJGIk4QIi4sFk4aRiI6LCIuLBZyMlJmMpQiLiwWNq3eaYYoIi4sFiIuKjYuOCIuLBZrYBdWLBYd7BYuIiYmIiwiLiwUO9xLK90EVi4sFdoyAb4tTCIuLBYpRlm+hjQiLiwWhipanisQIi4sFi8OAp3aKCIuTFYuLBayHnm2QUgiLiwWIUnhtaIgIi4sFaI14qYnGCIuLBY/EnqmsjggO9xLpxxWkiwWLdwVyiwWLYgV3iwWLtAVFiwWLngXZ9w4Fl4sFi/sNBTyLFcaLBYvnBVAvBQ73Epb3RRXwiwWLhQVW+0AFdosFv/cxBVOLBYWMiIiKhQiLiwWEdQWFiwWLvgUO9xKy3xWLiwWJkwWnlZmaip4Ii4sFi5uEk36LCIuLBXyMfYF+dwiLiwWFjgWWqpyboowIi4sFpYqYf4xzCIuLBYt8gn56fwiLiwWkiJd8i3EIi4sFiWRzd1yJCIuLBXmMgpCKlAiLiwWMko6PkYwIi4sFjouQiJKGCIuLBYqMjIuNigiLiwWShpGJkIsIi4sFno6VmY2kCIuLBY6qepZmgwgO9xKaihWLiwWJkgW2mqSkkq4Ii4sFgIB/hX6LCIuLBXCOfJ2JrAiLiwWMtJygrIwIi4sFroqdcoxaCIuLBYtQamdJfwjW6hWLiwWNwIGldYoIi4sFeIuCe4trCIuLBYpqlXugjAiLiwWWjJSRk5YIDgAABAAAAAB+AAAAfgAAAH4AAAB+AAAAfgAAAH4AAAB+AAA="
    font_map = get_font_data(font_str)
    # 15014777339
    # "&#x100cb;&#x100cc;&#x100cd;&#x100cb;&#x100ce;&#x100cf;&#x100cf;&#x100cf;&#x100d0;&#x100d0;&#x100d1;"
    txt = "&#x100cb;&#x100cc;&#x100cd;&#x100cb;&#x100ce;&#x100cf;&#x100cf;&#x100cf;&#x100d0;&#x100d0;&#x100d1;"
    for key, value in font_map.items():
        key = key.replace("uni", "&#x").lower() + ";"
        txt = txt.replace(key, value)
    print(txt)

输出结果:

使用PIL实现,减少内存,提高速度,但是识别准确率低,单纯的数字识别没啥问题

import io
import base64
import ddddocr
from pathlib import Path
from PIL import Image, ImageDraw
from fontTools.ttLib import TTFont
from fontTools.pens.recordingPen import RecordingPen


FILE_DIR = Path(__file__).parent


ocr = ddddocr.DdddOcr()


def render_and_ocr_charstring(points):
    """将字符路径渲染为图像并进行 OCR 识别"""
    try:
        # 创建一个白色背景的图像(1-bit,减少内存)
        width, height = 300, 300
        img = Image.new("1", (width, height), "white")
        draw = ImageDraw.Draw(img)

        # 找到所有坐标的最小 x 和 y 值
        min_x = min(p[0] for p in points)
        min_y = min(p[1] for p in points)

        # 计算偏移量(确保所有坐标点非负)
        offset_x = -min_x if min_x < 0 else 0
        offset_y = -min_y if min_y < 0 else 0

        # 应用整体偏移,使所有坐标都在画布内
        points = [(x + offset_x, y + offset_y) for x, y in points]

        # 进行镜像翻转(沿 y 轴中心翻转)
        points = [(x, height - y) for x, y in points]

        # 使用黑色填充闭合区域
        draw.polygon(points, fill="black")
        img = img.convert("L")  # 图像灰度转换
        # img.save(f"{FILE_DIR}/charstring_pillow.png")

        with io.BytesIO() as img_bytes:
            img.save(img_bytes, format="PNG")
            img.close()  # 释放内部资源
            return ocr.classification(img_bytes.getvalue())
    except Exception as e:
        print(f"OCR failed: {e}")
        return ""


def extract_charstring_points(char_string):
    """从 CharString 指令中提取路径点"""
    try:
        pen = RecordingPen()
        char_string.draw(pen)

        points = []
        for op, coords in pen.value:
            if coords:
                points.extend(coords)
        return points
    except Exception as e:
        print(f"Failed to extract with pen: {e}")
        return []


def extract_charstrings(font_str):
    try:
        font_data = base64.b64decode(font_str)
        font = TTFont(io.BytesIO(font_data))
        top_dict = font["CFF "].cff.topDictIndex[0]
        return top_dict.CharStrings, top_dict.charset[1:]  # 跳过 .notdef
    except Exception as e:
        print(f"Failed to extract charstrings: {e}")
        return {}, []


def map_glyphs_to_text(char_strings, glyph_names):
    result = {}
    for glyph_name in glyph_names:
        char_string = char_strings[glyph_name]
        points = extract_charstring_points(char_string)
        recognized = render_and_ocr_charstring(points)
        result[glyph_name] = recognized
    return result


if __name__ == "__main__":
    font_str = "T1RUTwAJAIAAAwAQQ0ZGIIpi0aIAAATIAAADjk9TLzJlk+AaAAABAAAAAGBjbWFwACgMUgAABAQAAACkaGVhZCZ5+TwAAACcAAAANmhoZWEA3wBMAAAA1AAAACRobXR4B3IAAAAACFgAAAAgbWF4cAAIUAAAAAD4AAAABm5hbWUKXYQxAAABYAAAAqNwb3N0AAMAAAAABKgAAAAgAAEAAAABAACUNrWcXw889QADAQAAAAAA43NdSAAAAADjc11IAAT//wB4ALUAAAADAAIAAAAAAAAAAQAAAN3/xQAAAH4AAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAgAAFAAAAgAAAADAH4B9AAFAAACigK7AAAAjAKKArsAAAHfADEBAgAAAAAAAAAAAAAAAIAAAAEAAAAAAAAAAAAAAABYWFhYAEAAywDRAN3/xQAAALUAAQAAAAEAAAAAAG8AtQAAAAAAAAAAACIBngABAAAAAAAAAAEAQgABAAAAAAABAAwAAAABAAAAAAACAAYAJAABAAAAAAADABUAxgABAAAAAAAEABMANgABAAAAAAAFAAsApQABAAAAAAAGABIAbwABAAAAAAAHAAEAQgABAAAAAAAIAAEAQgABAAAAAAAJAAEAQgABAAAAAAAKAAEAQgABAAAAAAALAAEAQgABAAAAAAAMAAEAQgABAAAAAAANAAEAQgABAAAAAAAOAAEAQgABAAAAAAAQAAwAAAABAAAAAAARAAYAJAADAAEECQAAAAIAYQADAAEECQABABgADAADAAEECQACAAwAKgADAAEECQADACoA2wADAAEECQAEACYASQADAAEECQAFABYAsAADAAEECQAGACQAgQADAAEECQAHAAIAYQADAAEECQAIAAIAYQADAAEECQAJAAIAYQADAAEECQAKAAIAYQADAAEECQALAAIAYQADAAEECQAMAAIAYQADAAEECQANAAIAYQADAAEECQAOAAIAYQADAAEECQAQABgADAADAAEECQARAAwAKk9wZW5UeXBlU2FucwBPAHAAZQBuAFQAeQBwAGUAUwBhAG4Ac01lZGl1bQBNAGUAZABpAHUAbU9wZW5UeXBlU2FucyBNZWRpdW0ATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbU9wZW5UeXBlU2Fuc01lZGl1bQBPAHAAZQBuAFQAeQBwAGUAUwBhAG4AcwBNAGUAZABpAHUAbVZlcnNpb24gMC4xAFYAZQByAHMAaQBvAG4AIAAwAC4AMSA6T3BlblR5cGVTYW5zIE1lZGl1bQAgADoATwBwAGUAbgBUAHkAcABlAFMAYQBuAHMAIABNAGUAZABpAHUAbQAAAAACAAMAAQAAABQAAwAKAAAANAAEACAAAAAEAAQAAQAAAAD//wAAAAD//wAAAAEAAAAAAAwAAAAAAHAAAAAAAAAACAAAAAAAAAAAAAAAAAABAMsAAQDLAAAAAQABAMwAAQDMAAAAAgABAM0AAQDNAAAAAwABAM4AAQDOAAAABAABAM8AAQDPAAAABQABANAAAQDQAAAABgABANEAAQDRAAAABwADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAEAQABAQETT3BlblR5cGVTYW5zTWVkaXVtAAEBAT/4GwD4HAL4HQP4HgSLivdx9xIFHQAAANcPHQAAAOYRix0AAAOOEh4KADkGJf8eDx4PHgoAOQYl/x4PHg8MBwALAQEMHysxOUFJUVlhaVZlcnNpb24gMC4xT3BlblR5cGVTYW5zIE1lZGl1bU9wZW5UeXBlU2Fuc01lZGl1bXVuaTEwMGNidW5pMTAwY2N1bmkxMDBjZHVuaTEwMGNldW5pMTAwY2Z1bmkxMDBkMHVuaTEwMGQxAAAAAYsBjAGNAY4BjwGQAZEACAIAAQAEAEsAxQEfAVQBggIkApT6lA73Eu2OFUeLBYuSBZuLBZCLjo6LkgiLiwWL9xoFi5GJjYaICIuLBXuDBYiTBbKhBZKHBYv7MQWLhI6IkIsIi4sFm4sFi4QFDvcS7fcwFVCLBYNnBb2MpHaMYQiLiwWIZnV3YYgIi4sFd4uBkIqUCIuLBYuTjo+SiwiLiwWQjJGIk4QIi4sFk4aRiI6LCIuLBZyMlJmMpQiLiwWNq3eaYYoIi4sFiIuKjYuOCIuLBZrYBdWLBYd7BYuIiYmIiwiLiwUO9xLK90EVi4sFdoyAb4tTCIuLBYpRlm+hjQiLiwWhipanisQIi4sFi8OAp3aKCIuTFYuLBayHnm2QUgiLiwWIUnhtaIgIi4sFaI14qYnGCIuLBY/EnqmsjggO9xLpxxWkiwWLdwVyiwWLYgV3iwWLtAVFiwWLngXZ9w4Fl4sFi/sNBTyLFcaLBYvnBVAvBQ73Epb3RRXwiwWLhQVW+0AFdosFv/cxBVOLBYWMiIiKhQiLiwWEdQWFiwWLvgUO9xKy3xWLiwWJkwWnlZmaip4Ii4sFi5uEk36LCIuLBXyMfYF+dwiLiwWFjgWWqpyboowIi4sFpYqYf4xzCIuLBYt8gn56fwiLiwWkiJd8i3EIi4sFiWRzd1yJCIuLBXmMgpCKlAiLiwWMko6PkYwIi4sFjouQiJKGCIuLBYqMjIuNigiLiwWShpGJkIsIi4sFno6VmY2kCIuLBY6qepZmgwgO9xKaihWLiwWJkgW2mqSkkq4Ii4sFgIB/hX6LCIuLBXCOfJ2JrAiLiwWMtJygrIwIi4sFroqdcoxaCIuLBYtQamdJfwjW6hWLiwWNwIGldYoIi4sFeIuCe4trCIuLBYpqlXugjAiLiwWWjJSRk5YIDgAABAAAAAB+AAAAfgAAAH4AAAB+AAAAfgAAAH4AAAB+AAA="
    # 15014777339
    txt = "&#x100cb;&#x100cc;&#x100cd;&#x100cb;&#x100ce;&#x100cf;&#x100cf;&#x100cf;&#x100d0;&#x100d0;&#x100d1;"

    char_strings, glyph_names = extract_charstrings(font_str)
    glyph_map = map_glyphs_to_text(char_strings, glyph_names)

    for key, value in glyph_map.items():
        txt = txt.replace(key.replace("uni", "&#x").lower() + ";", value)

    print(txt)

posted @ 2024-12-06 09:49  二二二狗子  阅读(53)  评论(0)    收藏  举报