对 uri分析获取mime type和 raw data



data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtDQo=

原始数据格式
 

data:text/html;base64,PCFET0NUWVBFIGh0bWw+DQo8aHRtbCBjbGFzcz0ibG9hZGluZyI+DQogIDxoZWFkPg0KICAgIDx0aXRsZT5jYW52YXNraXQgQVBJIOa1i+ivlTwvdGl0bGU+DQogICAgPG1ldGEgY2hhcnNldD0idXRmLTgiIC8+DQogIDwvaGVhZD4NCiAgPGJvZHkgc3R5bGU9ImZvbnQtZmFtaWx5OiBNaWNyb3NvZnQgWWFIZWkiPg0KICAgIDxjYW52YXMgaWQ9ImNvbnRlbnQiIHdpZHRoPSI0MDAwIiBoZWlnaHQ9IjQwMDAiPjwvY2FudmFzPg0KICAgIDxzY3JpcHQgdHlwZT0idGV4dC9qYXZhc2NyaXB0IiBzcmM9Ii4vYXBwL2Jpbi90ZXN0L2NhbnZhc2tpdC5qcyI+PC9zY3JpcHQ+DQogICAgPHNjcmlwdD4NCiAgICAgIGNvbnN0IGNrTG9hZGVkID0gQ2FudmFzS2l0SW5pdCh7DQogICAgICAgIGxvY2F0ZUZpbGU6IChmaWxlKSA9PiAiLi9hcHAvYmluL3Rlc3QvIiArIGZpbGUsDQogICAgICB9KTsNCg0KICAgICAgUHJvbWlzZS5hbGwoW2NrTG9hZGVkXSkudGhlbigoW0NhbnZhc0tpdF0pID0+IHsNCiAgICAgICAgY29uc3Qgc3VyZmFjZSA9IENhbnZhc0tpdC5NYWtlQ2FudmFzU3VyZmFjZSgiY29udGVudCIpOw0KICAgICAgICBpZiAoIXN1cmZhY2UpIHsNCiAgICAgICAgICBjb25zb2xlLmVycm9yKCJDb3VsZCBub3QgbWFrZSBzdXJmYWNlIik7DQogICAgICAgICAgcmV0dXJuOw0KICAgICAgICB9DQoNCiAgICAgICAgY29uc3QgY2FudmFzID0gc3VyZmFjZS5nZXRDYW52YXMoKTsNCg0KICAgICAgICB2YXIgb2xkX2RhdGEgPSBbMHgwMSwgMHgwMiwgMHgwM107DQogICAgICAgIHZhciBwYXRjaCA9IFsNCiAgICAgICAgICAweDQ3LCAweDQyLCAweDUzLCAweDQ0LCAweDQ5LCAweDQ2LCAweDM0LCAweDMyLCAweDAzLCAweGUyLCAweGZmLA0KICAgICAgICAgIDB4OGQsIDB4ZDIsIDB4MGEsIDB4MDMsIDB4ZWEsIDB4OWEsIDB4Y2EsIDB4MDksIDB4MGEsIDB4MDEsIDB4MDEsDQogICAgICAgICAgMHgwMSwgMHgwMSwgMHgwMCwgMHgwMiwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMCwgMHgwMSwgMHgwMiwgMHgwMSwNCiAgICAgICAgICAweDAxLCAweDA0LCAweDAzLA0KICAgICAgICBdOw0KICAgICAgICBwaWMgPSBDYW52YXNLaXQuQXBwbHlQYXRjaChvbGRfZGF0YSxwYXRjaCk7DQoNCiAgICAgICAgcGljID0gQ2FudmFzS2l0Lk1ha2VQaWN0dXJlKG9sZF9kYXRhKTsNCg0KICAgICAgICB7DQogICAgICAgICAgY2FudmFzLmRyYXdQaWN0dXJlKHBpYyk7DQogICAgICAgICAgc3VyZmFjZS5mbHVzaCgpOw0KICAgICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgICAgfQ0KDQogICAgICAgIHN1cmZhY2UuZmx1c2goKTsNCiAgICAgIH0pOw0KICAgIDwvc2NyaXB0Pg0KICA8L2JvZHk+DQo8L2h0bWw+DQo=

 

解析方法内部实现参考:


// On success returns a pair of <mime_type, data>.
// On error returns a pair of <string, nullptr>.
// mime_type should be ignored if data is nullptr.
std::pair<std::string, scoped_refptr<base::RefCountedString>>
ParseEncodedImageData(const std::string& encoded_image_data) {
  std::pair<std::string, scoped_refptr<base::RefCountedString>> result;

  GURL encoded_image_uri(encoded_image_data);
  if (!encoded_image_uri.is_valid() ||
      !encoded_image_uri.SchemeIs(url::kDataScheme)) {
    return result;
  }
  std::string content = encoded_image_uri.GetContent();
  // The content should look like this: "image/png;base64,aaa..." (where
  // "aaa..." is the base64-encoded image data).
  size_t mime_type_end = content.find_first_of(';');
  if (mime_type_end == std::string::npos)
    return result;

  std::string mime_type = content.substr(0, mime_type_end);

  size_t base64_begin = mime_type_end + 1;
  size_t base64_end = content.find_first_of(',', base64_begin);
  if (base64_end == std::string::npos)
    return result;
  auto base64 = base::MakeStringPiece(content.begin() + base64_begin,
                                      content.begin() + base64_end);
  if (base64 != "base64")
    return result;

  size_t data_begin = base64_end + 1;
  auto data =
      base::MakeStringPiece(content.begin() + data_begin, content.end());

  std::string decoded_data;
  if (!base::Base64Decode(data, &decoded_data))
    return result;

  result.first = mime_type;
  result.second =
      base::MakeRefCounted<base::RefCountedString>(std::move(decoded_data));
  return result;
}

D:\chromium110\chromium\src\net\base\data_url.cc :

bool DataURL::Parse(const GURL& url,
                    std::string* mime_type,
                    std::string* charset,
                    std::string* data) {
  if (!url.is_valid() || !url.has_scheme())
    return false;

  DCHECK(mime_type->empty());
  DCHECK(charset->empty());
  DCHECK(!data || data->empty());

  base::StringPiece content;
  std::string content_string;
  if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls)) {
    // Avoid copying the URL content which can be expensive for large URLs.
    content = url.GetContentPiece();
  } else {
    content_string = url.GetContent();
    content = content_string;
  }

  base::StringPiece::const_iterator comma = base::ranges::find(content, ',');
  if (comma == content.end())
    return false;

  std::vector<base::StringPiece> meta_data =
      base::SplitStringPiece(base::MakeStringPiece(content.begin(), comma), ";",
                             base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

  // These are moved to |mime_type| and |charset| on success.
  std::string mime_type_value;
  std::string charset_value;
  auto iter = meta_data.cbegin();
  if (iter != meta_data.cend()) {
    mime_type_value = base::ToLowerASCII(*iter);
    ++iter;
  }

  static constexpr base::StringPiece kBase64Tag("base64");
  static constexpr base::StringPiece kCharsetTag("charset=");

  bool base64_encoded = false;
  for (; iter != meta_data.cend(); ++iter) {
    if (!base64_encoded &&
        base::EqualsCaseInsensitiveASCII(*iter, kBase64Tag)) {
      base64_encoded = true;
    } else if (charset_value.empty() &&
               base::StartsWith(*iter, kCharsetTag,
                                base::CompareCase::INSENSITIVE_ASCII)) {
      charset_value = std::string(iter->substr(kCharsetTag.size()));
      // The grammar for charset is not specially defined in RFC2045 and
      // RFC2397. It just needs to be a token.
      if (!HttpUtil::IsToken(charset_value))
        return false;
    }
  }

  if (mime_type_value.empty()) {
    // Fallback to the default if nothing specified in the mediatype part as
    // specified in RFC2045. As specified in RFC2397, we use |charset| even if
    // |mime_type| is empty.
    mime_type_value = "text/plain";
    if (charset_value.empty())
      charset_value = "US-ASCII";
  } else if (!ParseMimeTypeWithoutParameter(mime_type_value, nullptr,
                                            nullptr)) {
    // Fallback to the default as recommended in RFC2045 when the mediatype
    // value is invalid. For this case, we don't respect |charset| but force it
    // set to "US-ASCII".
    mime_type_value = "text/plain";
    charset_value = "US-ASCII";
  }

  // The caller may not be interested in receiving the data.
  if (data) {
    // Preserve spaces if dealing with text or xml input, same as mozilla:
    //   https://bugzilla.mozilla.org/show_bug.cgi?id=138052
    // but strip them otherwise:
    //   https://bugzilla.mozilla.org/show_bug.cgi?id=37200
    // (Spaces in a data URL should be escaped, which is handled below, so any
    // spaces now are wrong. People expect to be able to enter them in the URL
    // bar for text, and it can't hurt, so we allow it.)
    //
    // TODO(mmenke): Is removing all spaces reasonable? GURL removes trailing
    // spaces itself, anyways. Should we just trim leading spaces instead?
    // Allowing random intermediary spaces seems unnecessary.

    auto raw_body = base::MakeStringPiece(comma + 1, content.end());

    // For base64, we may have url-escaped whitespace which is not part
    // of the data, and should be stripped. Otherwise, the escaped whitespace
    // could be part of the payload, so don't strip it.
    if (base64_encoded) {
      // If the data URL is well formed, we can decode it immediately.
      if (base::FeatureList::IsEnabled(base::features::kOptimizeDataUrls) &&
          IsDataURLReadyForDecode(raw_body)) {
        if (!base::Base64Decode(raw_body, data))
          return false;
      } else {
        std::string unescaped_body = base::UnescapeBinaryURLComponent(raw_body);
        if (!base::Base64Decode(unescaped_body, data,
                                base::Base64DecodePolicy::kForgiving))
          return false;
      }
    } else {
      // Strip whitespace for non-text MIME types.
      std::string temp;
      if (!(mime_type_value.compare(0, 5, "text/") == 0 ||
            mime_type_value.find("xml") != std::string::npos)) {
        temp = std::string(raw_body);
        base::EraseIf(temp, base::IsAsciiWhitespace<char>);
        raw_body = temp;
      }

      *data = base::UnescapeBinaryURLComponent(raw_body);
    }
  }

  *mime_type = std::move(mime_type_value);
  *charset = std::move(charset_value);
  return true;
}

解析方法2:

D:\chromium110\chromium\src\third_party\blink\renderer\platform\network\network_utils.cc
D:\chromium110\chromium\src\net\base\data_url_unittest.cc

#include "net/base/data_url.h"
#include "url/gurl.h"

#include "net/base/net_errors.h"
#include "net/http/http_response_headers.h"

std::string mime_type, charset, data;
  std::unique_ptr<std::string> response_body;
  if (net::DataURL::Parse(url_, &mime_type, &charset, &data))
    response_body = std::make_unique<std::string>(std::move(data));


//////////////////////////////////////////

struct ParseTestData {
  const char* url;
  bool is_valid;
  const char* mime_type;
  const char* charset;
  const std::string data;
};

}  // namespace

TEST(DataURLTest, Parse) {
  const ParseTestData tests[] = {
      {"data:", false, "", "", ""},

      {"data:,", true, "text/plain", "US-ASCII", ""},
  }
  for (const auto& test : tests) {
    SCOPED_TRACE(test.url);

    std::string mime_type;
    std::string charset;
    std::string data;
    bool ok = DataURL::Parse(GURL(test.url), &mime_type, &charset, &data);
    EXPECT_EQ(ok, test.is_valid);
    EXPECT_EQ(test.mime_type, mime_type);
    EXPECT_EQ(test.charset, charset);
    EXPECT_EQ(test.data, data);
  }
    ////////////////////////////////////////////////////////////
 GURL url(provider.ConsumeRemainingBytesAsString());
  std::string mime_type, charset, data;
  if (!net::DataURL::Parse(url, &mime_type, &charset, &data)) {
    // This case is only here to allow cluster fuzz pass any url,
    // to unblock further fuzzing.
    mime_type = "text/html";
    charset = "UTF-8";
  }
  blink::WebNavigationParams::FillStaticResponse(
      navigation_params.get(), blink::WebString::FromUTF8(mime_type),
      blink::WebString::FromUTF8(charset), data);
////////////////////////////////////////
    // Test a slightly larger data URL.
TEST(DataURLTest, Image) {
  // Use our nice little Chrome logo.
  GURL image_url(
      "data:image/png;base64,"
      "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAADVklEQVQ4jX2TfUwUB"
      "BjG3w1y+HGcd9dxhXR8T4awOccJGgOSWclHImznLkTlSw0DDQXkrmgYgbUYnlQTqQ"
      "xIEVxitD5UMCATRA1CEEg+Qjw3bWDxIauJv/5oumqs39/P827vnucRmYN0gyF01GI"
      "5MpCVdW0gO7tvNC+vqSEtbZefk5NuLv1jdJ46p/zw0HeH4+PHr3h7c1mjoV2t5rKz"
      "Mx1+fg9bAgK6zHq9cU5z+LpA3xOtx34+vTeT21onRuzssC3zxbbSwC13d/pFuC7Ck"
      "IMDxQpF7r/MWq12UctI1dWWm99ypqSYmRUBdKem8MkrO/kgaTt1O7YzlpzE5GIVd0"
      "WYUqt57yWf2McHTObYPbVD+ZwbtlLTVMZ3BW+TnLyXLaWtmEq6WJVbT3HBh3Svj2H"
      "QQcm43XwmtoYM6vVKleh0uoWvnzW3v3MpidruPTQPf0bia7sJOtBM0ufTWNvus/nk"
      "DFHF9ZS+uYVjRUasMeHUmyLYtcklTvzWGFZnNOXczThvpKIzjcahSqIzkvDLayDq6"
      "D3eOjtBbNUEIZYyqsvj4V4wY92eNJ4IoyhTbxXX1T5xsV9tm9r4TQwHLiZw/pdDZJ"
      "ea8TKmsmR/K0uLh/GwnCHghTja6lPhphezPfO5/5MrVvMzNaI3+ERHfrFzPKQukrQ"
      "GI4d/3EFD/3E2mVNYvi4at7CXWREaxZGD+3hg28zD3gVMd6q5c8GdosynKmSeRuGz"
      "pjyl1/9UDGtPR5HeaKT8Wjo17WXk579BXVUhN64ehF9fhRtq/uxxZKzNiZFGD0wRC"
      "3NFROZ5mwIPL/96K/rKMMLrIzF9uhHr+/sYH7DAbwlgC4J+R2Z7FUx1qLnV7MGF40"
      "smVSoJ/jvHRfYhQeUJd/SnYtGWhPHR0Sz+GE2F2yth0B36Vcz2KpnufBJbsysjjW4"
      "kblBUiIjiURUWqJY65zxbnTy57GQyH58zgy0QBtTQv5gH15XMdKkYu+TGaJMnlm2O"
      "34uI4b9tflqp1+QEFGzoW/ulmcofcpkZCYJhDfSpme7QcrHa+Xfji8paEQkTkSfmm"
      "oRWRNZr/F1KfVMjW+IKEnv2FwZfKdzt0BQR6lClcZR0EfEXEfv/G6W9iLiIyCoReV"
      "5EnhORIBHx+ufPj/gLB/zGI/G4Bk0AAAAASUVORK5CYII=");

  std::string mime_type;
  std::string charset;
  std::string data;
  scoped_refptr<HttpResponseHeaders> headers;

  EXPECT_EQ(OK, DataURL::BuildResponse(image_url, "GET", &mime_type, &charset,
                                       &data, &headers));

  EXPECT_EQ(911u, data.size());
  EXPECT_EQ("image/png", mime_type);
  EXPECT_TRUE(charset.empty());

  ASSERT_TRUE(headers);
  std::string value;
  EXPECT_EQ(headers->GetStatusLine(), "HTTP/1.1 200 OK");
  EXPECT_TRUE(headers->GetNormalizedHeader("Content-Type", &value));
  EXPECT_EQ(value, "image/png");
}

 

生成方法:

std::string MakeDataURIForImage(base::span<const uint8_t> image_data,
                                base::StringPiece mime_subtype) {
  std::string result = "data:image/";
  result.append(mime_subtype.begin(), mime_subtype.end());
  result += ";base64,";
  result += base::Base64Encode(image_data);
  return result;
}

 

posted @ 2024-03-01 16:09  Bigben  阅读(29)  评论(0编辑  收藏  举报