实现 json path 来评估函数式解析器的损耗

目的

之前就考虑评估过 函数式解析器 在dotnet这些面向对象语言上有着一些损耗,虽然表意性很强,与ABNF范式结合使用,维护性大大提升

不过由于性能考虑(以及之前认为也或许没有太多机会实现解析器),就没打算继续深究

不过现实确实有多次需要实现解析器,每次从0手写,太费时间了,比较很多实现对于解析性能并不需极致性能,(特别现在ai code疯狂的时代,老板哪会给那么多时间让你慢慢扣性能,老板们都常说ai几秒钟的事情,你个辣鸡还花几天)

所以为了血压和身体健康,只要性能将就,还是对于解析不太在意性能的场景以后就这样偷懒吧 (比如今年搞得 VKProxy 里面得动态条件和简单模板替换, 路子可行也许后面还是换掉吧,维护多累呀)

当然也有高手搞过类似的库,比如 Parlot , 不过其不支持 stream, 理想情况还是想stream 也能支持, 所以现在先简单搞一波:参考 Parlot 简单实现 string 解析器底层以及 json path 简单版

然后和 Newtonsoft.json 在简单的json path 场景比较,如果打不过,那怕没有继续的必要了

简要说明函数式解析器

这里以最简单 json path "$.Name" 举例,

我们所需要解析即为 $ 开头, . 之后字符为属性名

换成代码大致为 Parser = Char('$').And(Char('.')).And(AnyExclude("[]().,\" '\r\n@$!=<\\?&|*:")).Eof()

像 Char And 这些方法在面向对象中我们会让方法生成解析算子实例,以达到与函数式相同效果, 这也是性能损耗的一个大点

比如 Char 方法

 public static Parser<char> Char(char c) => new CharLiteral(c);

具体算子实现

public class CharLiteral : Parser<char>
{
    private SearchValues<char> c;

    public string Value { get; private set; }

    public CharLiteral(char c)
    {
        this.c = SearchValues.Create(new char[] { c });
        Value = c.ToString();
    }

    public CharLiteral(string c)
    {
        this.c = SearchValues.Create(c);
        Value = c;
    }

    public override bool Parse(CharParseContext context, ref ParseResult<char> result)
    {
        context.EnterParser(this);
        var cursor = context.Cursor;
        if (!cursor.Eof && c.Contains(cursor.Current))
        {
            var c = cursor.Current;
            var start = cursor.Offset;
            cursor.Advance();
            result.Set(start, cursor.Offset, c);

            context.ExitParser(this);
            return true;
        }
        context.ExitParser(this);
        return false;
    }
}

好了,借用经典话语 这是功,这是防,这是boss 去吧

按照 [rfc9535](https://www.rfc-editor.org/rfc/rfc9535.html) 标准, 最终 json path 解析器大致如下:


public class JsonPathParser
{
    //public static Parser<char> B = Char(new char[]
    //{ (char)0x20, // Space
    //  (char)0x09, //Horizontal tab
    //  (char)0x0A, // Line feed or New line
    //  (char)0x0D // Carriage return
    //});

    public static readonly Parser<char> RootIdentifier = Char('$').Name(nameof(RootIdentifier));
    public static readonly Parser<int> Int = Int().Name(nameof(Int));
    public static readonly Parser<char> DoubleQuoted = Char('"').Name(nameof(DoubleQuoted));
    public static readonly Parser<char> SingleQuoted = Char('\'').Name(nameof(SingleQuoted));
    public static readonly Parser<IStatement> WildcardSelector = Char('*').Then<IStatement>(static x => new WildcardSelectorStatment()).Name(nameof(WildcardSelector));
    public static readonly Parser<IStatement> IndexSelector = Int.Then<IStatement>(static x => new IndexSelectorStatment() { Index = x }).Name(nameof(IndexSelector));
    public static readonly Parser<TextSpan> StringLiteral = Between(DoubleQuoted, ZeroOrOne(Any("\"", mustHasEnd: true, escape: '\\')), DoubleQuoted).Or(Between(SingleQuoted, ZeroOrOne(Any("'", mustHasEnd: true, escape: '\\')), SingleQuoted)).Name(nameof(StringLiteral));
    public static readonly Parser<IStatement> NameSelector = StringLiteral.Then<IStatement>(static x => new Member() { Name = x.Span.ToString() }).Name(nameof(NameSelector));
    public static readonly Parser<int> Start = Int;
    public static readonly Parser<int> End = Int;
    public static readonly Parser<int> Step = Int;

    public static readonly Parser<Nothing> S = IgnoreChar(new char[]
    { (char)0x20, // Space
      (char)0x09, //Horizontal tab
      (char)0x0A, // Line feed or New line
      (char)0x0D // Carriage return
    }).Name(nameof(S));

    public static readonly Parser<char> CurrentNodeIdentifier = Char('@').Name(nameof(CurrentNodeIdentifier));
    public static readonly Parser<char> LogicalNotOp = Char('!').Name(nameof(LogicalNotOp));
    public static readonly Parser<string> ComparisonOp = Text("==").Or(Text("!=")).Or(Text("<=")).Or(Text(">=")).Or(Text("<")).Or(Text(">")).Name(nameof(ComparisonOp));
    public static readonly Parser<IStatement> Num = Decimal(NumberOptions.Float).Then<IStatement>(static x => new NumberValue(x)).Name(nameof(Num));
    public static readonly Parser<IStatement> True = Text("true").Then<IStatement>(static x => BoolValue.True).Name(nameof(True));
    public static readonly Parser<IStatement> False = Text("false").Then<IStatement>(static x => BoolValue.False).Name(nameof(False));
    public static readonly Parser<IStatement> Null = Text("null").Then<IStatement>(static x => NullValue.Value).Name(nameof(Null));

    private const string name = "[]().,\" '\r\n@$!=<\\?&|*:";

    //public static Parser<char> LCALPHA = Char('a', 'z');
    //public static Parser<char> DIGIT = Char('0', '9');
    //public static Parser<char> ALPHA = Char((char)0x41, (char)0x5A).Or(Char((char)0x61, (char)0x7A));
    public static readonly Parser<IStatement> MemberNameShorthand = AnyExclude(name).Then<IStatement>(static x => new Member { Name = x.Span.ToString() }).Name(nameof(MemberNameShorthand));

    //public static Parser<char> NameFirst = ALPHA.Or(Char('_')).Or(Char((char)0x80, (char)0xD7FF)).Or(Char((char)0xE000, (char)0xFFFF));
    //public static Parser<char> NameChar = NameFirst.Or(DIGIT);
    //public static Parser<char> FunctionNameFirst = LCALPHA;

    //public static Parser<char> FunctionNameChar = FunctionNameFirst.Or(Char('_')).Or(DIGIT);
    //public static Parser<string> FunctionName = FunctionNameFirst.And(ZeroOrMany(FunctionNameChar)).Then<string>(static x => throw new NotImplementedException());
    public static readonly Parser<string> FunctionName = AnyExclude(name).Then<string>(static x => x.Span.ToString()).Name(nameof(FunctionName));

    public static readonly Parser<IStatement> SliceSelector = Optional<int?>(Start.And(S).Then<int?>(static x => x.Item1), null).And(Char(':')).And(S).And(Optional<int?>(End.And(S).Then<int?>(static x => x.Item1), null)).And(Optional<int?>(Char(':').And(Optional<int?>(S.And(Step).Then<int?>(static x => x.Item2), null)).Then<int?>(static x => x.Item2))).Then<IStatement>(static x => new SliceStatement() { Start = x.Item1, End = x.Item4, Step = x.Item5 })
        .Name(nameof(SliceSelector));

    public static readonly Deferred<IStatement> LogicalExpr = Deferred<IStatement>(nameof(LogicalExpr));

    public static readonly Parser<IStatement> FilterSelector = Char('?').And(S).And(LogicalExpr).Then<IStatement>(static x => new FilterSelectorStatement()
    {
        Statement = x.Item3
    }).Name(nameof(FilterSelector));

    public static readonly Parser<IStatement> Selector = NameSelector.Or(WildcardSelector).Or(SliceSelector).Or(IndexSelector).Or(FilterSelector).Name(nameof(Selector));

    public static readonly Parser<IStatement> ParenExpr = Optional(LogicalNotOp.And(S)).And(Char('(')).And(S).And(LogicalExpr).And(S).And(Char(')'))
        .Then<IStatement>(static x => new UnaryOperaterStatement()
        {
            Operator = x.Item1.Item1 == '!' ? "!" : "(",
            Statement = x.Item4
        }).Name(nameof(ParenExpr));

    public static readonly Deferred<IReadOnlyList<(Nothing, IStatement)>> Segments = Deferred<IReadOnlyList<(Nothing, IStatement)>>(nameof(Segments));

    public static readonly Deferred<IStatement> FunctionExpr = Deferred<IStatement>(nameof(FunctionExpr));
    public static readonly Deferred<IStatement> JsonPathQuery = Deferred<IStatement>(nameof(JsonPathQuery));
    public static readonly Parser<IStatement> RelQuery = CurrentNodeIdentifier.And(Segments).Then<IStatement>(static x => new CurrentNode() { Child = ConvertSegments(x.Item2) }).Name(nameof(RelQuery));
    public static readonly Parser<IStatement> Literal = Num.Or(StringLiteral.Then<IStatement>(static x => new StringValue(x.Span.ToString()))).Or(True).Or(False).Or(Null).Name(nameof(Literal));
    public static readonly Parser<IStatement> NameSegment = Char('[').And(NameSelector).And(Char(']')).Then<IStatement>(static x => x.Item2).Or(Char('.').And(MemberNameShorthand).Then<IStatement>(static x => x.Item2)).Name(nameof(NameSegment));
    public static readonly Parser<IStatement> IndexSegment = Char('[').And(IndexSelector).And(Char(']')).Then<IStatement>(static x => x.Item2).Name(nameof(IndexSegment));

    public static readonly Parser<IStatement> SingularQuerySegments = ZeroOrMany(S.And(NameSegment.Or(IndexSegment))).Then<IStatement>(ConvertSegments).Name(nameof(SingularQuerySegments));

    public static readonly Parser<IStatement> RelSingularQuery = CurrentNodeIdentifier.And(SingularQuerySegments).Then<IStatement>(static x => new CurrentNode() { Child = x.Item2 }).Name(nameof(RelSingularQuery));
    public static readonly Parser<IStatement> AbsSingularQuery = RootIdentifier.And(SingularQuerySegments).Then<IStatement>(static x => new RootNode() { Child = x.Item2 }).Name(nameof(AbsSingularQuery));
    public static readonly Parser<IStatement> SingularQuery = RelSingularQuery.Or(AbsSingularQuery).Name(nameof(SingularQuery));
    public static readonly Parser<IStatement> Comparable = Literal.Or(SingularQuery).Or(FunctionExpr).Name(nameof(Comparable));
    public static readonly Parser<IStatement> ComparisonExpr = Comparable.And(S).And(ComparisonOp).And(S).And(Comparable).Then<IStatement>(static x => new OperatorStatement() { Left = x.Item1, Operator = x.Item3, Right = x.Item5 }).Name(nameof(ComparisonExpr));
    public static readonly Parser<IStatement> FilterQuery = RelQuery.Or(JsonPathQuery).Name(nameof(FilterQuery));
    public static readonly Parser<IStatement> FunctionArgument = FilterQuery.Or(LogicalExpr).Or(FunctionExpr).Or(Literal).Name(nameof(FunctionArgument));
    public static readonly Parser<IStatement> TestExpr = Optional(LogicalNotOp.And(S)).And(FilterQuery.Or(FunctionExpr)).Then<IStatement>(static x => x.Item1.Item1 == '!' ? new UnaryOperaterStatement() { Operator = "!", Statement = x.Item2 } : x.Item2).Name(nameof(TestExpr));
    public static readonly Parser<IStatement> BasicExpr = ParenExpr.Or(ComparisonExpr).Or(TestExpr).Name(nameof(BasicExpr));

    public static readonly Parser<IStatement> LogicalAndExpr = BasicExpr.And(ZeroOrMany(S.And(Text("&&")).And(S).And(BasicExpr))).Then<IStatement>(static x =>
    {
        IStatement current = x.Item1;
        if (x.Item2 != null && x.Item2.Count > 0)
        {
            foreach (var item in x.Item2)
            {
                current = new AndStatement() { Left = current, Right = item.Item4 };
            }
        }
        return current;
    }).Name(nameof(LogicalAndExpr));

    public static readonly Parser<IStatement> LogicalOrExpr = LogicalAndExpr.And(ZeroOrMany(S.And(Text("||")).And(S).And(LogicalAndExpr))).Then<IStatement>(static x =>
    {
        IStatement current = x.Item1;
        if (x.Item2 != null && x.Item2.Count > 0)
        {
            foreach (var item in x.Item2)
            {
                current = new OrStatement() { Left = current, Right = item.Item4 };
            }
        }
        return current;
    }).Name(nameof(LogicalOrExpr));

    public static readonly Parser<IStatement> BracketedSelection = Char('[').And(S).And(Selector).And(ZeroOrMany(S.And(Char(',')).And(S).And(Selector))).And(S).And(Char(']'))
        .Then<IStatement>(static x =>
    {
        var list = new List<IStatement> { x.Item3 };
        if (x.Item4 != null)
            list.AddRange(x.Item4.Select(y => y.Item4));
        if (list.Count == 0)
            return null;
        return list.Count == 1 ? list[0] : new UnionSelectionStatement(list);
    }).Name(nameof(BracketedSelection));

    public static readonly Parser<IStatement> ChildSegment = BracketedSelection.Or(Char('.').And(WildcardSelector.Or(MemberNameShorthand)).Then<IStatement>(static x => x.Item2)).Name(nameof(ChildSegment));

    public static readonly Parser<IStatement> DescendantSegment = Char('.').And(Char('.')).And(BracketedSelection.Or(WildcardSelector).Or(MemberNameShorthand)).Then<IStatement>(static x => new WildcardSelectorStatment() { Child = x.Item3 }).Name(nameof(DescendantSegment));
    public static readonly Parser<IStatement> Segment = ChildSegment.Or(DescendantSegment).Name(nameof(Segment));

    public static readonly Parser<IStatement> Parser;

    static JsonPathParser()
    {
        LogicalExpr.Parser = LogicalOrExpr;
        Segments.Parser = ZeroOrMany(S.And(Segment));
        //MemberNameShorthand.Parser = NameFirst.And(ZeroOrMany(NameChar)).Then<IStatement>(static x => new Member { Name = x.Item1 + new string(x.Item2.ToArray()) });
        FunctionExpr.Parser = FunctionName.And(Char('(')).And(S).And(Optional(FunctionArgument.And(ZeroOrMany(S.And(Char(',')).And(S).And(FunctionArgument))))).And(S).And(Char(')')).Then<IStatement>(static x =>
        {
            var args = new List<IStatement>();
            if (x.Item4.Item1 != null)
            {
                args.Add(x.Item4.Item1);
            }
            if (x.Item4.Item2 != null)
            {
                args.AddRange(x.Item4.Item2.Select(y => y.Item4));
            }
            var func = new FunctionStatement()
            {
                Name = x.Item1,
                Arguments = args.Count == 0 ? Array.Empty<IStatement>() : args.ToArray()
            };

            return func;
        });
        JsonPathQuery.Parser = RootIdentifier.And(Segments).Then<IStatement>(static x => new RootNode() { Child = ConvertSegments(x.Item2) });
        Parser = JsonPathQuery.Eof().Name(nameof(Parser));
    }

    private static IStatement ConvertSegments(IReadOnlyList<(Nothing, IStatement)> x)
    {
        if (x == null || x.Count == 0)
        {
            return null;
        }
        else if (x.Count == 1)
            return x[0].Item2;
        else
        {
            var current = x.Last().Item2;
            for (int i = x.Count - 2; i >= 0; i--)
            {
                if (x[i].Item2 is IParentStatement p)
                {
                    var pp = p;
                    while (pp.Child != null)
                    {
                        var pc = p.Child as IParentStatement;
                        if (pc is null)
                            throw new NotSupportedException($"Cannot set child for statement of type {p.GetType().FullName}");
                        pp = pc;
                    }
                    pp.Child = current;
                    current = p;
                }
                else
                {
                    throw new NotSupportedException($"Cannot set child for statement of type {x[i].Item2.GetType().FullName}");
                }
            }
            return current;
        }
    }
}

性能测试

测试代码

[MemoryDiagnoser, GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
public class JsonPathBenchmarks
{
    private object data = new
    {
        Num = -3.4,
        Nu = null as string,
        Array = new object[]
        {
            new { Name = "Alice", Age = 30 },
            new { Name = "Bob", Age = 25 },
            new { Name = "Charlie", Age = 35 }
        },
    };

    private string path = "$.Array[1]['Name','Age']";

    private string json;
    private IStatement cache;
    private readonly JsonPath pc;

    public JsonPathBenchmarks()
    {
        json = JsonSerializer.Serialize(data);
        JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
        cache = result.Value;
        pc = JsonPath.Parse(path);
    }

    [Benchmark]
    public object CacheTest()
    {
        return cache.EvaluateJson(json);
    }

    [Benchmark]
    public object NoCacheTest()
    {
        JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
        return result.Value.EvaluateJson(json);
    }

    private Newtonsoft.Json.Linq.JToken testTo = Newtonsoft.Json.Linq.JToken.Parse("null");

    [Benchmark]
    public object NewtonsoftOnlyParseTest()
    {
        return testTo.SelectTokens(path);
    }

    [Benchmark]
    public object NewtonsoftTest()
    {
        Newtonsoft.Json.Linq.JToken token = Newtonsoft.Json.Linq.JToken.Parse(json);
        return token.SelectTokens(path);
    }

    [Benchmark]
    public object JsonPathNetTest()
    {
        var p = JsonPath.Parse(path);
        var instance = JsonNode.Parse(json);
        return p.Evaluate(instance);
    }

    [Benchmark]
    public object JsonPathNetCacheTest()
    {
        var instance = JsonNode.Parse(json);
        return pc.Evaluate(instance);
    }

    [Benchmark]
    public object JsonPathNetOnlyParseTest()
    {
        return JsonPath.Parse(path);
    }

    [Benchmark]
    public object OnlyParseTest()
    {
        JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
        return result.Value;
    }
}

效果


BenchmarkDotNet v0.15.4, Windows 11 (10.0.26100.6584/24H2/2024Update/HudsonValley)
Intel Core i7-10700 CPU 2.90GHz, 1 CPU, 16 logical and 8 physical cores
.NET SDK 9.0.304
  [Host]     : .NET 9.0.8 (9.0.8, 9.0.825.36511), X64 RyuJIT x86-64-v3
  DefaultJob : .NET 9.0.8 (9.0.8, 9.0.825.36511), X64 RyuJIT x86-64-v3


Method Mean Error StdDev Gen0 Gen1 Allocated
CacheTest 1,488.1 ns 15.42 ns 14.42 ns 0.2537 - 2136 B
NoCacheTest 2,650.5 ns 11.44 ns 10.14 ns 0.4196 - 3528 B
NewtonsoftOnlyParseTest 204.0 ns 2.65 ns 2.35 ns 0.1128 - 944 B
NewtonsoftTest 3,039.1 ns 33.40 ns 26.07 ns 0.9651 0.0191 8088 B
JsonPathNetTest 1,266.9 ns 25.03 ns 35.90 ns 0.2728 - 2288 B
JsonPathNetCacheTest 855.0 ns 9.16 ns 7.65 ns 0.1469 - 1232 B
JsonPathNetOnlyParseTest 346.3 ns 3.19 ns 2.66 ns 0.1259 - 1056 B
OnlyParseTest 1,057.0 ns 5.24 ns 4.90 ns 0.1659 - 1392 B

嗯,在 system.text.json 上解析 比 Newtonsoft.json 还是快了点,具体实现都是最基本的,没有像 jsonpath 那些做优化设计,性能感觉还是可以的

所有 实现放在 https://github.com/fs7744/Lmzzz

后续看什么时候有空搞搞 stream ,能行以后就这样偷懒搞吧

posted @ 2025-11-05 17:28  victor.x.qu  阅读(0)  评论(0)    收藏  举报