chrome浏览器 V8是怎么实现 JSON.parse 的?

昨天跟友人一起大块欢饮之后,问了一个比较好玩的问题? JSON.parse 是怎么实现?当时草草的中规中矩的回答了一番,但终究对自己无知不是很满意

今天上午想起来之后,便迅速翻出node的源码来一探究竟!

对js的解析无疑是 v8的专属,但由于v8是由c++编写,我对c++的各种语法了解比较一般,如果有描述错误的地方,还请各位大神斧正.

v8的git地址 https://chromium.googlesource.com/v8/v8.git  需要翻墙

node的官网 https://nodejs.org   , 如果速度慢可以直接访问 http://nodejs.cn/

我是在node源码的deps找到的v8的相关代码

在v8的src文件夹中 直接全局搜索 JSON::Parse ( c++的语法 ) 可以找到对应代码编写在 api.cc文件中,具体代码如下:

在刚刚的搜索结果中,找到一个简单的json parse,我们就从这里入手

这个例子里面关键的代码不多,主要是

ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
                               JsonParser(isolate, source).ParseJson(), Object);

对string做处理的是 JSONParse对象的ParseJson函数,然后继续全局搜索 JSONParse,在 json-parser.cc中找到了ParseJson的相关代码

template <bool seq_one_byte>
MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
  // Advance to the first character (possibly EOS)
  AdvanceSkipWhitespace();
  Handle<Object> result = ParseJsonValue();
  if (result.is_null() || c0_ != kEndOfString) {
    // Some exception (for example stack overflow) is already pending.
    if (isolate_->has_pending_exception()) return Handle<Object>::null();

    // Parse failed. Current character is the unexpected token.
    Factory* factory = this->factory();
    MessageTemplate::Template message;
    Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
    Handle<Object> arg2;

    switch (c0_) {
      case kEndOfString:
        message = MessageTemplate::kJsonParseUnexpectedEOS;
        break;
      case '-':
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
        message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
        break;
      case '"':
        message = MessageTemplate::kJsonParseUnexpectedTokenString;
        break;
      default:
        message = MessageTemplate::kJsonParseUnexpectedToken;
        arg2 = arg1;
        arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
        break;
    }

    Handle<Script> script(factory->NewScript(source_));
    // We should sent compile error event because we compile JSON object in
    // separated source file.
    isolate()->debug()->OnCompileError(script);
    MessageLocation location(script, position_, position_ + 1);
    Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
    return isolate()->template Throw<Object>(error, &location);
  }
  return result;
}
ParseJson

很明显,这个函数的代码中最关键的就是ParseJsonValue这个函数

// Parse any JSON value.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
  StackLimitCheck stack_check(isolate_);
  if (stack_check.HasOverflowed()) {
    isolate_->StackOverflow();
    return Handle<Object>::null();
  }

  if (stack_check.InterruptRequested() &&
      isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
    return Handle<Object>::null();
  }

  if (c0_ == '"') return ParseJsonString();
  if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
  if (c0_ == '{') return ParseJsonObject();
  if (c0_ == '[') return ParseJsonArray();
  if (c0_ == 'f') {
    if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
        AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
      AdvanceSkipWhitespace();
      return factory()->false_value();
    }
    return ReportUnexpectedCharacter();
  }
  if (c0_ == 't') {
    if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
        AdvanceGetChar() == 'e') {
      AdvanceSkipWhitespace();
      return factory()->true_value();
    }
    return ReportUnexpectedCharacter();
  }
  if (c0_ == 'n') {
    if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
        AdvanceGetChar() == 'l') {
      AdvanceSkipWhitespace();
      return factory()->null_value();
    }
    return ReportUnexpectedCharacter();
  }
  return ReportUnexpectedCharacter();
}
ParseJsonValue

string在解析之前,进行了一次字符串格式的整理,这样可以保证c0_ 是string的第一个有效字符,而不是空格之类的无用字符.

接下来就比较简单了,根据不同的第一个字符进行不同的处理,其他都会报未期待的字符错误

 字符 调用函数

"

ParseJsonString
数字或 - ParseJsonNumber
{ ParseJsonObject
[ ParseJsonArray
f 判断是否是false
t 判断是否是true
n 判断是否是null

ParseJsonString 继续验证string的有效性,因为本身就string类型,所以不需要类型转换

template <bool seq_one_byte>
bool JsonParser<seq_one_byte>::ParseJsonString(Handle<String> expected) {
  int length = expected->length();
  if (source_->length() - position_ - 1 > length) {
    DisallowHeapAllocation no_gc;
    String::FlatContent content = expected->GetFlatContent();
    if (content.IsOneByte()) {
      DCHECK_EQ('"', c0_);
      const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
      const uint8_t* expected_chars = content.ToOneByteVector().start();
      for (int i = 0; i < length; i++) {
        uint8_t c0 = input_chars[i];
        if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
          return false;
        }
      }
      if (input_chars[length] == '"') {
        position_ = position_ + length + 1;
        AdvanceSkipWhitespace();
        return true;
      }
    }
  }
  return false;
}
ParseJsonString

ParseJsonNumber 验证有效性后,利用 StringToDouble 把string转化为数字

template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
  bool negative = false;
  int beg_pos = position_;
  if (c0_ == '-') {
    Advance();
    negative = true;
  }
  if (c0_ == '0') {
    Advance();
    // Prefix zero is only allowed if it's the only digit before
    // a decimal point or exponent.
    if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
  } else {
    int i = 0;
    int digits = 0;
    if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
    do {
      i = i * 10 + c0_ - '0';
      digits++;
      Advance();
    } while (IsDecimalDigit(c0_));
    if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
      SkipWhitespace();
      return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
    }
  }
  if (c0_ == '.') {
    Advance();
    if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
    do {
      Advance();
    } while (IsDecimalDigit(c0_));
  }
  if (AsciiAlphaToLower(c0_) == 'e') {
    Advance();
    if (c0_ == '-' || c0_ == '+') Advance();
    if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
    do {
      Advance();
    } while (IsDecimalDigit(c0_));
  }
  int length = position_ - beg_pos;
  double number;
  if (seq_one_byte) {
    Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
    number = StringToDouble(isolate()->unicode_cache(), chars,
                            NO_FLAGS,  // Hex, octal or trailing junk.
                            std::numeric_limits<double>::quiet_NaN());
  } else {
    Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
    String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
    Vector<const uint8_t> result =
        Vector<const uint8_t>(buffer.start(), length);
    number = StringToDouble(isolate()->unicode_cache(), result,
                            NO_FLAGS,  // Hex, octal or trailing junk.
                            0.0);
    buffer.Dispose();
  }
  SkipWhitespace();
  return factory()->NewNumber(number, pretenure_);
}
ParseJsonNumber

ParseJsonObject  先建立了一个空对象,对整个字符串开始循环检查语法匹配,并将对应的key,value全部找到后添加到之前的空对象中

// Parse a JSON object. Position must be right at '{'.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
  HandleScope scope(isolate());
  Handle<JSObject> json_object =
      factory()->NewJSObject(object_constructor(), pretenure_);
  Handle<Map> map(json_object->map());
  int descriptor = 0;
  ZoneList<Handle<Object> > properties(8, zone());
  DCHECK_EQ(c0_, '{');

  bool transitioning = true;

  AdvanceSkipWhitespace();
  if (c0_ != '}') {
    do {
      if (c0_ != '"') return ReportUnexpectedCharacter();

      int start_position = position_;
      Advance();

      if (IsDecimalDigit(c0_)) {
        ParseElementResult element_result = ParseElement(json_object);
        if (element_result == kNullHandle) return Handle<Object>::null();
        if (element_result == kElementFound) continue;
      }
      // Not an index, fallback to the slow path.

      position_ = start_position;
#ifdef DEBUG
      c0_ = '"';
#endif

      Handle<String> key;
      Handle<Object> value;

      // Try to follow existing transitions as long as possible. Once we stop
      // transitioning, no transition can be found anymore.
      DCHECK(transitioning);
      // First check whether there is a single expected transition. If so, try
      // to parse it first.
      bool follow_expected = false;
      Handle<Map> target;
      if (seq_one_byte) {
        key = TransitionArray::ExpectedTransitionKey(map);
        follow_expected = !key.is_null() && ParseJsonString(key);
      }
      // If the expected transition hits, follow it.
      if (follow_expected) {
        target = TransitionArray::ExpectedTransitionTarget(map);
      } else {
        // If the expected transition failed, parse an internalized string and
        // try to find a matching transition.
        key = ParseJsonInternalizedString();
        if (key.is_null()) return ReportUnexpectedCharacter();

        target = TransitionArray::FindTransitionToField(map, key);
        // If a transition was found, follow it and continue.
        transitioning = !target.is_null();
      }
      if (c0_ != ':') return ReportUnexpectedCharacter();

      AdvanceSkipWhitespace();
      value = ParseJsonValue();
      if (value.is_null()) return ReportUnexpectedCharacter();

      if (transitioning) {
        PropertyDetails details =
            target->instance_descriptors()->GetDetails(descriptor);
        Representation expected_representation = details.representation();

        if (value->FitsRepresentation(expected_representation)) {
          if (expected_representation.IsHeapObject() &&
              !target->instance_descriptors()
                   ->GetFieldType(descriptor)
                   ->NowContains(value)) {
            Handle<FieldType> value_type(
                value->OptimalType(isolate(), expected_representation));
            Map::GeneralizeField(target, descriptor, details.constness(),
                                 expected_representation, value_type);
          }
          DCHECK(target->instance_descriptors()
                     ->GetFieldType(descriptor)
                     ->NowContains(value));
          properties.Add(value, zone());
          map = target;
          descriptor++;
          continue;
        } else {
          transitioning = false;
        }
      }

      DCHECK(!transitioning);

      // Commit the intermediate state to the object and stop transitioning.
      CommitStateToJsonObject(json_object, map, &properties);

      JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
          .Check();
    } while (transitioning && MatchSkipWhiteSpace(','));

    // If we transitioned until the very end, transition the map now.
    if (transitioning) {
      CommitStateToJsonObject(json_object, map, &properties);
    } else {
      while (MatchSkipWhiteSpace(',')) {
        HandleScope local_scope(isolate());
        if (c0_ != '"') return ReportUnexpectedCharacter();

        int start_position = position_;
        Advance();

        if (IsDecimalDigit(c0_)) {
          ParseElementResult element_result = ParseElement(json_object);
          if (element_result == kNullHandle) return Handle<Object>::null();
          if (element_result == kElementFound) continue;
        }
        // Not an index, fallback to the slow path.

        position_ = start_position;
#ifdef DEBUG
        c0_ = '"';
#endif

        Handle<String> key;
        Handle<Object> value;

        key = ParseJsonInternalizedString();
        if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();

        AdvanceSkipWhitespace();
        value = ParseJsonValue();
        if (value.is_null()) return ReportUnexpectedCharacter();

        JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
                                                          value)
            .Check();
      }
    }

    if (c0_ != '}') {
      return ReportUnexpectedCharacter();
    }
  }
  AdvanceSkipWhitespace();
  return scope.CloseAndEscape(json_object);
}
ParseJsonObject

ParseJsonArray  大致上跟ParseJsonObject比较相同,

// Parse a JSON array. Position must be right at '['.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
  HandleScope scope(isolate());
  ZoneList<Handle<Object> > elements(4, zone());
  DCHECK_EQ(c0_, '[');

  ElementKindLattice lattice;

  AdvanceSkipWhitespace();
  if (c0_ != ']') {
    do {
      Handle<Object> element = ParseJsonValue();
      if (element.is_null()) return ReportUnexpectedCharacter();
      elements.Add(element, zone());
      lattice.Update(element);
    } while (MatchSkipWhiteSpace(','));
    if (c0_ != ']') {
      return ReportUnexpectedCharacter();
    }
  }
  AdvanceSkipWhitespace();

  // Allocate a fixed array with all the elements.

  Handle<Object> json_array;
  const ElementsKind kind = lattice.GetElementsKind();

  switch (kind) {
    case PACKED_ELEMENTS:
    case PACKED_SMI_ELEMENTS: {
      Handle<FixedArray> elems =
          factory()->NewFixedArray(elements.length(), pretenure_);
      for (int i = 0; i < elements.length(); i++) elems->set(i, *elements[i]);
      json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
      break;
    }
    case PACKED_DOUBLE_ELEMENTS: {
      Handle<FixedDoubleArray> elems = Handle<FixedDoubleArray>::cast(
          factory()->NewFixedDoubleArray(elements.length(), pretenure_));
      for (int i = 0; i < elements.length(); i++) {
        elems->set(i, elements[i]->Number());
      }
      json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
      break;
    }
    default:
      UNREACHABLE();
  }

  return scope.CloseAndEscape(json_array);
}
ParseJsonArray

这几个方法中对ParseJsonValue进行的递归,来保证一次循环中读取的值是一个完整的值,这个想法非常棒,从而保证了对象的属性放置对象,数字,数组,字符串,null...等等的兼容.

学而时习之,不亦说乎.

第一次写技术博客,难免有所欠缺,
欢迎江湖大侠前来指导.

posted @ 2017-12-22 16:07  十一云子  阅读(1245)  评论(0编辑  收藏