Python 字符串全面解析 - 详解

文章目录

Python 字符串全面解析
C++ 字符串全面解析

Python 字符串全面解析

1. 字符串创建

# 使用单引号、双引号、三引号
s1 = 'hello'            # 单引号
s2 = "world"            # 双引号
s3 = '''多行
字符串'''                # 三引号（可以跨行）
s4 = """也是
多行字符串"""
# 字符串构造函数
s5 = str(123)          # "123"
s6 = str(3.14)         # "3.14"
s7 = str([1, 2, 3])    # "[1, 2, 3]"

2. 基本操作

访问和索引

text = "Hello Python"
# 索引访问（从0开始）
print(text[0])      # H
print(text[6])      # P
# 负索引（从-1开始，反向）
print(text[-1])     # n
print(text[-6])     # P
# 切片 [start:end:step]
print(text[0:5])    # Hello
print(text[6:])     # Python
print(text[:5])     # Hello
print(text[::2])    # HloPto（步长为2）
print(text[::-1])   # nohtyP olleH（反转字符串）
# 字符串长度
print(len(text))    # 12

拼接和重复

# 拼接
s1 = "Hello" + " " + "World"  # Hello World
s2 = "Hi " * 3                # Hi Hi Hi 
# 使用join方法（更高效）
words = ["Python", "is", "awesome"]
s3 = " ".join(words)          # Python is awesome
s4 = "-".join(["2024", "01", "01"])  # 2024-01-01

3. 字符串方法

查找和替换

text = "Python is great. Python is powerful."
# 查找
print(text.find("Python"))     # 0（第一次出现的位置）
print(text.find("Java"))       # -1（未找到）
print(text.index("great"))     # 10（找不到会抛出异常）
print("Python" in text)        # True
# 计数
print(text.count("Python"))    # 2
# 替换
new_text = text.replace("Python", "Java", 1)  # 只替换第一个
print(new_text)  # "Java is great. Python is powerful."

大小写转换

text = "Hello World"
print(text.upper())       # HELLO WORLD
print(text.lower())       # hello world
print(text.title())       # Hello World
print(text.capitalize())  # Hello world
print(text.swapcase())    # hELLO wORLD

空白字符处理

text = "  Hello World  \n"
print(text.strip())       # "Hello World"（移除两端空白）
print(text.lstrip())      # "Hello World  \n"（移除左端）
print(text.rstrip())      # "  Hello World"（移除右端）
print("  test  ".strip()) # "test"

分割和连接

# 分割
csv = "apple,banana,orange"
print(csv.split(","))      # ['apple', 'banana', 'orange']
sentence = "Python is fun"
print(sentence.split())    # ['Python', 'is', 'fun']（默认按空白分割）
print(sentence.split(" ", 1))  # ['Python', 'is fun']（只分割一次）
# 按行分割
multi_line = "line1\nline2\nline3"
print(multi_line.splitlines())  # ['line1', 'line2', 'line3']
# 分区（partition）
text = "Python:3.10:release"
print(text.partition(":"))  # ('Python', ':', '3.10:release')

判断方法

s1 = "hello123"
s2 = "HELLO"
s3 = "123"
s4 = "  "
s5 = "Hello World"
print(s1.isalnum())    # True（字母或数字）
print(s1.isalpha())    # False（纯字母）
print(s3.isdigit())    # True（纯数字）
print(s2.isupper())    # True（全大写）
print(s1.islower())    # True（全小写）
print(s4.isspace())    # True（全空白字符）
print(s5.istitle())    # True（标题格式，每个单词首字母大写）
print(s1.startswith("he"))  # True
print(s1.endswith("123"))   # True

4. 字符串格式化

f-string（Python 3.6+ 推荐）

name = "Alice"
age = 25
height = 1.68
# 基础用法
print(f"My name is {name}")  # My name is Alice
# 表达式
print(f"I'm {age + 1} years old")  # I'm 26 years old
# 格式化数字
print(f"Height: {height:.2f}m")    # Height: 1.68m
print(f"Age: {age:03d}")           # Age: 025
# 字典值
person = {"name": "Bob", "age": 30}
print(f"{person['name']} is {person['age']}")  # Bob is 30

format() 方法

# 位置参数
print("{} is {} years old".format("Alice", 25))  # Alice is 25 years old
print("{1} comes after {0}".format("first", "second"))  # second comes after first
# 关键字参数
print("{name} is {age} years old".format(name="Bob", age=30))
# 格式化规范
print("PI: {:.2f}".format(3.14159))      # PI: 3.14
print("Number: {:05d}".format(42))       # Number: 00042
print("Percent: {:.1%}".format(0.75))    # Percent: 75.0%
# 对齐
print("{:<10}".format("left"))      # 左对齐
print("{:>10}".format("right"))     # 右对齐
print("{:^10}".format("center"))    # 居中对齐

% 格式化（旧式）

name = "Charlie"
age = 35
print("Name: %s, Age: %d" % (name, age))       # Name: Charlie, Age: 35
print("Float: %.3f" % 3.14159)                 # Float: 3.142
print("Hex: 0x%x" % 255)                       # Hex: 0xff

5. 转义字符和原始字符串

# 常见转义字符
print("Line1\nLine2")      # 换行
print("Tab\tseparated")    # 制表符
print("Backslash: \\")     # 反斜杠
print("Quote: \'\"")       # 单引号和双引号
# 原始字符串（不转义）
path = r"C:\Users\Name\Documents"  # 常用于Windows路径
regex = r"\d+\s+\w+"               # 常用于正则表达式
print(path)  # C:\Users\Name\Documents
# 多行字符串（自动包含换行符）
multiline = """第一行
第二行
第三行"""

6. 字符串编码

# 编码转换
text = "你好，世界"
# 编码为字节
utf8_bytes = text.encode("utf-8")    # b'\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc\x8c\xe4\xb8\x96\xe7\x95\x8c'
gbk_bytes = text.encode("gbk")       # b'\xc4\xe3\xba\xc3\xa3\xac\xca\xc0\xbd\xe7'
# 解码为字符串
decoded_text = utf8_bytes.decode("utf-8")  # 你好，世界
# 字符和Unicode码点
print(ord("A"))    # 65（字符→Unicode）
print(chr(65))     # A（Unicode→字符）
print("\u4f60\u597d")  # 你好（Unicode转义）

7. 字符串检查和处理

# 移除前缀/后缀（Python 3.9+）
url = "https://example.com"
print(url.removeprefix("https://"))  # example.com
filename = "document.txt"
print(filename.removesuffix(".txt"))  # document
# 填充
text = "Hello"
print(text.ljust(10, "-"))  # Hello-----（左对齐填充）
print(text.rjust(10, "*"))  # *****Hello（右对齐填充）
print(text.center(11, "="))  # ===Hello===（居中对齐）
# 展开制表符
print("a\tb\tc".expandtabs(4))  # a   b   c（制表符替换为4个空格）

8. 实战示例

# 示例1：统计词频
def word_frequency(text):
words = text.lower().split()
frequency = {}
for word in words:
# 移除标点
word = word.strip(".,!?;:")
if word:
frequency[word] = frequency.get(word, 0) + 1
return frequency
text = "Hello world! Hello Python. Python is great."
print(word_frequency(text))
# {'hello': 2, 'world': 1, 'python': 2, 'is': 1, 'great': 1}
# 示例2：检查回文
def is_palindrome(s):
s = ''.join(c.lower() for c in s if c.isalnum())
return s == s[::-1]
print(is_palindrome("A man, a plan, a canal: Panama"))  # True
# 示例3：密码强度检查
def check_password(password):
if len(password) < 8:
return "密码太短"
if not any(c.isupper() for c in password):
return "需要大写字母"
if not any(c.islower() for c in password):
return "需要小写字母"
if not any(c.isdigit() for c in password):
return "需要数字"
return "密码强度足够"
print(check_password("Pass1234"))  # 密码强度足够
# 示例4：格式化表格数据
def format_table(data):
# 找出每列最大宽度
col_widths = [max(len(str(item)) for item in col) for col in zip(*data)]
# 格式化每行
for row in data:
formatted_row = " | ".join(f"{str(item):<{width}}"
for item, width in zip(row, col_widths))
print(formatted_row)
data = [
["Name", "Age", "City"],
["Alice", "25", "New York"],
["Bob", "30", "London"],
["Charlie", "35", "Paris"]
]
format_table(data)

9. 性能提示

# 1. 避免使用 + 拼接大量字符串
# 不好的方式
result = ""
for i in range(10000):
result += str(i)  # 每次创建新字符串
# 好的方式（使用列表推导式 + join）
result = "".join(str(i) for i in range(10000))
# 2. 使用in运算符检查子串
if "search" in large_string:  # 快
pass
# 3. 字符串是不可变的
s = "hello"
s2 = s.upper()  # 创建新字符串，原字符串不变
print(s)        # "hello"
print(s2)       # "HELLO"

总结

字符串是不可变序列
支持丰富的内置方法
f-string是最推荐的格式化方式
使用join()拼接大量字符串更高效
注意编码问题，特别是处理多语言文本时.

C++ 字符串全面解析

C++ 提供了多种处理字符串的方式，包括 C 风格字符串和现代 C++ 的 std::string 类。以下是详细指南：

1. 字符串类型和创建

#include <iostream>
  #include <string>
    #include <cstring>  // C 风格字符串
      #include <sstream>  // 字符串流
        #include <format>   // C++20 格式化
          int main() {
          // 1. C 风格字符串（字符数组）
          char cstr1[] = "Hello";          // 自动添加 \0
          char cstr2[10] = "World";        // 预留空间
          const char* cstr3 = "Hello";     // 只读，存储在常量区
          // 2. std::string（推荐）
          std::string s1 = "Hello";        // 从C字符串构造
          std::string s2("World");         // 构造函数
          std::string s3(5, 'A');          // "AAAAA"，重复字符
          std::string s4(s1);              // 拷贝构造
          // 3. 原始字符串字面量（C++11）
          std::string raw_str = R"(Line1\nLine2\tTab)";  // 不转义
          std::string path = R"(C:\Users\Name\Documents)";
          // 多行原始字符串
          std::string multiline = R"(
          First line
          Second line
          Third line
          )";
          // 4. 宽字符串
          std::wstring wstr = L"宽字符串";
          std::u16string utf16_str = u"UTF-16 字符串";
          std::u32string utf32_str = U"UTF-32 字符串";
          return 0;
          }

2. 基本操作

访问和索引

#include <iostream>
  #include <string>
    int main() {
    std::string text = "Hello C++";
    // 1. 索引访问（operator[]，不检查边界）
    std::cout << text[0] << std::endl;   // H
    std::cout << text[7] << std::endl;   // +
    // 2. at() 方法（检查边界，越界抛出异常）
    std::cout << text.at(1) << std::endl; // e
    // 3. 前/后字符访问
    std::cout << text.front() << std::endl; // H
    std::cout << text.back() << std::endl;  // +
    // 4. 获取C风格字符串
    const char* cstr = text.c_str();
    const char* data_ptr = text.data();  // C++17前可能需要\0，C++17保证
    // 5. 字符串长度
    std::cout << "Length: " << text.length() << std::endl;     // 9
    std::cout << "Size: " << text.size() << std::endl;         // 9
    std::cout << "Capacity: " << text.capacity() << std::endl; // 容量
    std::cout << "Empty? " << text.empty() << std::endl;       // 是否为空
    return 0;
    }

切片（子串）

#include <iostream>
  #include <string>
    int main() {
    std::string text = "Hello World";
    // 1. substr() 获取子串
    std::string sub1 = text.substr(0, 5);   // Hello (位置0，长度5)
    std::string sub2 = text.substr(6);      // World (位置6到末尾)
    std::string sub3 = text.substr(6, 5);   // World
    // 2. 复制子串到字符数组
    char buffer[20];
    text.copy(buffer, 5, 0);  // 从位置0复制5个字符
    buffer[5] = '\0';         // 手动添加结束符
    std::cout << buffer << std::endl;  // Hello
    return 0;
    }

拼接和重复

#include <iostream>
  #include <string>
    int main() {
    // 1. 使用 + 运算符
    std::string s1 = "Hello";
    std::string s2 = "World";
    std::string s3 = s1 + " " + s2;  // Hello World
    // 2. 使用 += 运算符
    std::string result = "Hello";
    result += " ";
    result += s2;  // Hello World
    // 3. 使用 append()
    std::string s4 = "Hello";
    s4.append(" ").append("World");  // Hello World
    s4.append(3, '!');               // Hello World!!!
    // 4. 使用 push_back() 添加单个字符
    s4.push_back('?');
    // 5. 字符串重复（C++23 有 repeat()）
    std::string repeated;
    for (int i = 0; i < 3; ++i) {
    repeated += "Hi ";
    }
    // C++23: std::string repeated = std::string("Hi ").repeat(3);
    return 0;
    }

3. 字符串方法

查找和替换

#include <iostream>
  #include <string>
    #include <algorithm>
      int main() {
      std::string text = "C++ is powerful. C++ is fast.";
      // 1. 查找
      size_t pos1 = text.find("C++");        // 0
      size_t pos2 = text.find("C++", 1);     // 17（从位置1开始找）
      size_t pos3 = text.find("Java");       // std::string::npos
      // 2. 反向查找
      size_t rpos = text.rfind("C++");       // 17（从后往前）
      // 3. 查找任意字符
      size_t pos4 = text.find_first_of("+-*/");  // 1（找到+）
      size_t pos5 = text.find_last_of(".");      // 28（找到.）
      // 4. 检查是否包含（C++23）
      // bool contains = text.contains("C++");
      // 5. 替换
      std::string text2 = text;
      text2.replace(0, 3, "Rust");          // 从0开始替换3个字符
      // Rust is powerful. C++ is fast.
      // 替换所有出现的子串
      std::string text3 = text;
      size_t start_pos = 0;
      while ((start_pos = text3.find("C++", start_pos)) != std::string::npos) {
      text3.replace(start_pos, 3, "Go");
      start_pos += 2; // 移动到替换后位置
      }
      // 6. 计数
      int count = 0;
      size_t pos = 0;
      while ((pos = text.find("C++", pos)) != std::string::npos) {
      ++count;
      pos += 3; // 跳过找到的字符串长度
      }
      std::cout << "Count: " << count << std::endl;  // 2
      return 0;
      }

大小写转换

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cctype>
        int main() {
        std::string text = "Hello World";
        // 1. 转换为大写
        std::string upper = text;
        std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper);
        std::cout << upper << std::endl;  // HELLO WORLD
        // 2. 转换为小写
        std::string lower = text;
        std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
        std::cout << lower << std::endl;  // hello world
        // 3. 使用lambda自定义转换
        std::string capitalized = text;
        if (!capitalized.empty()) {
        capitalized[0] = std::toupper(capitalized[0]);
        for (size_t i = 1; i < capitalized.length(); ++i) {
        capitalized[i] = std::tolower(capitalized[i]);
        }
        }
        std::cout << capitalized << std::endl;  // Hello world
        return 0;
        }

空白字符处理

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cctype>
        // 修剪字符串辅助函数
        std::string trim_left(const std::string& str) {
        auto it = std::find_if(str.begin(), str.end(),
        [](unsigned char ch) { return !std::isspace(ch); });
        return std::string(it, str.end());
        }
        std::string trim_right(const std::string& str) {
        auto it = std::find_if(str.rbegin(), str.rend(),
        [](unsigned char ch) { return !std::isspace(ch); });
        return std::string(str.begin(), it.base());
        }
        std::string trim(const std::string& str) {
        return trim_right(trim_left(str));
        }
        int main() {
        std::string text = "  Hello World  \n\t";
        // 使用辅助函数
        std::cout << "Trim left: '" << trim_left(text) << "'" << std::endl;
        std::cout << "Trim right: '" << trim_right(text) << "'" << std::endl;
        std::cout << "Trim both: '" << trim(text) << "'" << std::endl;
        return 0;
        }

分割和连接

#include <iostream>
  #include <string>
    #include <vector>
      #include <sstream>
        #include <algorithm>
          #include <iterator>
            // 分割字符串
            std::vector<std::string> split(const std::string& str, char delimiter) {
              std::vector<std::string> tokens;
                std::string token;
                std::istringstream tokenStream(str);
                while (std::getline(tokenStream, token, delimiter)) {
                tokens.push_back(token);
                }
                return tokens;
                }
                // 使用 find 分割
                std::vector<std::string> split_find(const std::string& str,
                  const std::string& delimiter) {
                  std::vector<std::string> tokens;
                    size_t start = 0, end = 0;
                    while ((end = str.find(delimiter, start)) != std::string::npos) {
                    tokens.push_back(str.substr(start, end - start));
                    start = end + delimiter.length();
                    }
                    tokens.push_back(str.substr(start));
                    return tokens;
                    }
                    int main() {
                    std::string csv = "apple,banana,orange";
                    // 1. 分割
                    std::vector<std::string> fruits = split(csv, ',');
                      for (const auto& fruit : fruits) {
                      std::cout << fruit << std::endl;
                      }
                      // 2. 连接
                      std::vector<std::string> words = {"C++", "is", "awesome"};
                        // 方法1：使用循环
                        std::string result1;
                        for (size_t i = 0; i < words.size(); ++i) {
                        result1 += words[i];
                        if (i != words.size() - 1) {
                        result1 += " ";
                        }
                        }
                        // 方法2：使用 stringstream（推荐）
                        std::ostringstream oss;
                        for (size_t i = 0; i < words.size(); ++i) {
                        if (i != 0) oss << " ";
                        oss << words[i];
                        }
                        std::string result2 = oss.str();
                        // 方法3：自定义连接函数
                        auto join = [](const std::vector<std::string>& vec,
                          const std::string& delimiter) {
                          if (vec.empty()) return std::string();
                          std::string result = vec[0];
                          for (size_t i = 1; i < vec.size(); ++i) {
                          result += delimiter + vec[i];
                          }
                          return result;
                          };
                          std::string result3 = join(words, " - ");
                          std::cout << result3 << std::endl;  // C++ - is - awesome
                          return 0;
                          }

4. 字符串格式化

std::format（C++20 推荐）

#include <iostream>
  #include <string>
    #include <format>
      int main() {
      std::string name = "Alice";
      int age = 25;
      double height = 1.68;
      // 1. 基础格式化
      std::string s1 = std::format("My name is {}", name);
      std::cout << s1 << std::endl;  // My name is Alice
      // 2. 位置参数
      std::string s2 = std::format("{1} comes after {0}", "first", "second");
      std::cout << s2 << std::endl;  // second comes after first
      // 3. 数字格式化
      std::string s3 = std::format("Height: {:.2f}m", height);  // Height: 1.68m
      std::string s4 = std::format("Age: {:03d}", age);         // Age: 025
      std::string s5 = std::format("Hex: 0x{:x}", 255);         // Hex: 0xff
      std::string s6 = std::format("Percent: {:.1%}", 0.75);    // Percent: 75.0%
      // 4. 对齐和填充
      std::string s7 = std::format("{:<10}", "left");     // 左对齐
      std::string s8 = std::format("{:>10}", "right");    // 右对齐
      std::string s9 = std::format("{:^10}", "center");   // 居中对齐
      std::string s10 = std::format("{:*^10}", "center"); // 用*填充
      return 0;
      }

std::stringstream（C++98 及以后）

#include <iostream>
  #include <string>
    #include <sstream>
      #include <iomanip>
        int main() {
        std::string name = "Bob";
        int age = 30;
        double salary = 12345.67;
        // 1. 基本使用
        std::ostringstream oss;
        oss << "Name: " << name << ", Age: " << age;
        std::string result = oss.str();
        std::cout << result << std::endl;
        // 2. 格式化数字
        std::ostringstream oss2;
        oss2 << std::fixed << std::setprecision(2)
        << "Salary: $" << salary;
        std::cout << oss2.str() << std::endl;  // Salary: $12345.67
        // 3. 填充和对齐
        std::ostringstream oss3;
        oss3 << std::left << std::setw(10) << "Name"
        << std::right << std::setw(8) << "Age" << std::endl
        << std::left << std::setw(10) << "Alice"
        << std::right << std::setw(8) << 25 << std::endl
        << std::left << std::setw(10) << "Bob"
        << std::right << std::setw(8) << 30;
        std::cout << oss3.str() << std::endl;
        return 0;
        }

sprintf（C 风格，需要谨慎使用）

#include <iostream>
  #include <cstdio>
    #include <string>
      int main() {
      char buffer[100];
      const char* name = "Charlie";
      int age = 35;
      // 使用 sprintf（注意缓冲区溢出风险！）
      std::sprintf(buffer, "Name: %s, Age: %d", name, age);
      std::string result = buffer;
      std::cout << result << std::endl;
      // C++11 更安全的 snprintf
      std::snprintf(buffer, sizeof(buffer),
      "Name: %s, Age: %03d", name, age);
      std::cout << buffer << std::endl;
      return 0;
      }

5. 字符串比较和查找

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cstring>
        int main() {
        std::string s1 = "Hello";
        std::string s2 = "World";
        std::string s3 = "hello";
        // 1. 比较运算符
        std::cout << std::boolalpha;
        std::cout << (s1 == "Hello") << std::endl;   // true
        std::cout << (s1 != s2) << std::endl;        // true
        std::cout << (s1 < s2) << std::endl;         // true (字典序)
        std::cout << (s1 > s2) << std::endl;         // false
          // 2. compare() 方法
          int result = s1.compare(s2);
          if (result < 0) {
          std::cout << s1 << " < " << s2 << std::endl;
          } else if (result > 0) {
          std::cout << s1 << " > " << s2 << std::endl;
            } else {
            std::cout << s1 << " == " << s2 << std::endl;
            }
            // 3. 大小写不敏感比较
            auto case_insensitive_compare = [](const std::string& a,
            const std::string& b) {
            if (a.length() != b.length()) return false;
            for (size_t i = 0; i < a.length(); ++i) {
            if (std::tolower(a[i]) != std::tolower(b[i])) {
            return false;
            }
            }
            return true;
            };
            std::cout << "Case insensitive compare: "
            << case_insensitive_compare(s1, s3) << std::endl;  // true
            // 4. C风格字符串比较
            const char* cstr1 = "Hello";
            const char* cstr2 = "World";
            int cmp = std::strcmp(cstr1, cstr2);
            std::cout << "strcmp result: " << cmp << std::endl;
            return 0;
            }

6. 字符串修改和操作

#include <iostream>
  #include <string>
    #include <algorithm>
      int main() {
      // 1. 插入
      std::string text = "Hello World";
      text.insert(5, " C++");      // Hello C++ World
      text.insert(text.end(), '!'); // 末尾插入字符
      // 2. 删除
      std::string text2 = text;
      text2.erase(5, 4);           // 删除从位置5开始的4个字符
      text2.pop_back();            // 删除最后一个字符（C++11）
      // 3. 清空
      std::string text3 = text;
      text3.clear();               // 清空字符串
      std::cout << "Is empty: " << text3.empty() << std::endl;
      // 4. 调整大小
      std::string text4 = "Hello";
      text4.resize(10, '!');       // Hello!!!!!
      text4.resize(3);             // Hel
      // 5. 交换
      std::string a = "Hello";
      std::string b = "World";
      a.swap(b);                    // a="World", b="Hello"
      std::swap(a, b);              // 同上
      // 6. 反转
      std::string text5 = "Hello";
      std::reverse(text5.begin(), text5.end());
      std::cout << text5 << std::endl;  // olleH
      // 7. 移除特定字符
      std::string text6 = "Hello, World!";
      text6.erase(std::remove(text6.begin(), text6.end(), ','),
      text6.end());
      std::cout << text6 << std::endl;  // Hello World!
      return 0;
      }

7. 字符串编码和转换

#include <iostream>
  #include <string>
    #include <locale>
      #include <codecvt>  // C++17前可用，C++17后不推荐
        #include <cstdlib>
          int main() {
          // 1. 宽窄字符串转换（C++11-C++17）
          // 注意：codecvt在C++17已弃用，C++20移除
          // 实际项目建议使用第三方库如iconv、ICU
          /*
          // UTF-8 转 wstring
          std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
            std::string utf8_str = "你好，世界";
            std::wstring wide_str = converter.from_bytes(utf8_str);
            // wstring 转 UTF-8
            std::string utf8_str2 = converter.to_bytes(wide_str);
            */
            // 2. 字符编码判断
            auto is_ascii = [](const std::string& str) {
            for (unsigned char c : str) {
            if (c > 127) return false;
            }
            return true;
            };
            std::cout << "Is ASCII: " << is_ascii("Hello") << std::endl;      // true
            std::cout << "Is ASCII: " << is_ascii("你好") << std::endl;        // false
            // 3. 大小写转换（考虑本地化）
            std::locale loc;
            std::string text = "Hello World";
            for (char& c : text) {
            c = std::tolower(c, loc);
            }
            std::cout << text << std::endl;  // hello world
            return 0;
            }

8. 字符串视图（C++17）

#include <iostream>
  #include <string>
    #include <string_view>
      void process_string_view(std::string_view sv) {
      // string_view 是只读视图，不拥有数据
      std::cout << "Length: " << sv.length() << std::endl;
      std::cout << "Substring: " << sv.substr(0, 5) << std::endl;
      std::cout << "Find: " << sv.find("World") << std::endl;
      }
      int main() {
      // 1. 从各种来源创建 string_view
      std::string str = "Hello World";
      const char* cstr = "Hello World";
      char arr[] = "Hello World";
      std::string_view sv1(str);           // 从 std::string
      std::string_view sv2(cstr);          // 从 C 字符串
      std::string_view sv3(arr);           // 从字符数组
      std::string_view sv4("Literal");     // 从字符串字面量
      // 2. 避免复制，提高性能
      process_string_view(str);
      process_string_view(cstr);
      process_string_view("Direct literal");
      // 3. string_view 操作（类似 string）
      std::string_view text = "Hello World";
      std::cout << text[0] << std::endl;        // H
      std::cout << text.substr(6) << std::endl; // World
      std::cout << text.find("World") << std::endl; // 6
      // 4. 注意事项：string_view 不管理生命周期
      // 危险示例：
      std::string_view dangerous() {
      std::string temp = "Temporary";
      return temp;  // temp 被销毁，string_view 悬垂！
      }
      return 0;
      }

9. 正则表达式（C++11）

#include <iostream>
  #include <string>
    #include <regex>
      int main() {
      std::string text = "Email: test@example.com, Phone: 123-456-7890";
      // 1. 匹配
      std::regex email_pattern(R"(\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b)");
      if (std::regex_search(text, email_pattern)) {
      std::cout << "Found email address" << std::endl;
      }
      // 2. 提取所有匹配
      std::regex phone_pattern(R"(\d{3}-\d{3}-\d{4})");
      std::sregex_iterator begin(text.begin(), text.end(), phone_pattern);
      std::sregex_iterator end;
      for (auto it = begin; it != end; ++it) {
      std::smatch match = *it;
      std::cout << "Phone: " << match.str() << std::endl;
      }
      // 3. 替换
      std::regex hidden_pattern(R"(\d{3}-\d{3}-)\d{4}");
      std::string hidden = std::regex_replace(text, hidden_pattern, "$1****");
      std::cout << "Hidden: " << hidden << std::endl;
      // 4. 验证
      std::regex date_pattern(R"(\d{4}-\d{2}-\d{2})");
      std::string date = "2024-01-01";
      if (std::regex_match(date, date_pattern)) {
      std::cout << "Valid date format" << std::endl;
      }
      return 0;
      }

10. 实战示例

#include <iostream>
  #include <string>
    #include <vector>
      #include <map>
        #include <algorithm>
          #include <sstream>
            #include <cctype>
              // 示例1：统计词频
              std::map<std::string, int> word_frequency(const std::string& text) {
                std::map<std::string, int> frequency;
                  std::istringstream iss(text);
                  std::string word;
                  while (iss >> word) {
                  // 移除标点并转换为小写
                  word.erase(std::remove_if(word.begin(), word.end(), ::ispunct),
                  word.end());
                  std::transform(word.begin(), word.end(), word.begin(), ::tolower);
                  if (!word.empty()) {
                  ++frequency[word];
                  }
                  }
                  return frequency;
                  }
                  // 示例2：检查回文
                  bool is_palindrome(const std::string& str) {
                  std::string cleaned;
                  std::copy_if(str.begin(), str.end(), std::back_inserter(cleaned),
                  [](char c) { return std::isalnum(c); });
                  std::transform(cleaned.begin(), cleaned.end(), cleaned.begin(), ::tolower);
                  return std::equal(cleaned.begin(), cleaned.begin() + cleaned.size() / 2,
                  cleaned.rbegin());
                  }
                  // 示例3：密码强度检查
                  std::string check_password(const std::string& password) {
                  if (password.length() < 8) {
                  return "密码太短";
                  }
                  bool has_upper = std::any_of(password.begin(), password.end(), ::isupper);
                  bool has_lower = std::any_of(password.begin(), password.end(), ::islower);
                  bool has_digit = std::any_of(password.begin(), password.end(), ::isdigit);
                  if (!has_upper) return "需要大写字母";
                  if (!has_lower) return "需要小写字母";
                  if (!has_digit) return "需要数字";
                  return "密码强度足够";
                  }
                  // 示例4：格式化表格
                  void format_table(const std::vector<std::vector<std::string>>& data) {
                    if (data.empty()) return;
                    // 计算每列最大宽度
                    std::vector<size_t> col_widths(data[0].size(), 0);
                      for (const auto& row : data) {
                      for (size_t i = 0; i < row.size(); ++i) {
                      col_widths[i] = std::max(col_widths[i], row[i].length());
                      }
                      }
                      // 打印表格
                      for (const auto& row : data) {
                      for (size_t i = 0; i < row.size(); ++i) {
                      std::cout << std::left << std::setw(col_widths[i] + 2)
                      << row[i];
                      }
                      std::cout << std::endl;
                      }
                      }
                      int main() {
                      // 测试词频统计
                      std::string text = "Hello world! Hello C++. C++ is great.";
                      auto freq = word_frequency(text);
                      for (const auto& [word, count] : freq) {
                      std::cout << word << ": " << count << std::endl;
                      }
                      // 测试回文检查
                      std::cout << "Is palindrome: "
                      << is_palindrome("A man, a plan, a canal: Panama")
                      << std::endl;
                      // 测试密码检查
                      std::cout << "Password check: "
                      << check_password("Pass1234") << std::endl;
                      // 测试表格格式化
                      std::vector<std::vector<std::string>> table = {
                        {"Name", "Age", "City"},
                        {"Alice", "25", "New York"},
                        {"Bob", "30", "London"},
                        {"Charlie", "35", "Paris"}
                        };
                        format_table(table);
                        return 0;
                        }

11. 性能提示

#include <iostream>
  #include <string>
    #include <chrono>
      int main() {
      // 1. 避免不必要的字符串拷贝
      // 不好：多次拷贝
      std::string process_string(std::string input) {
      std::string result = input;  // 拷贝
      result += " processed";
      return result;  // 可能再次拷贝（RVO/NRVO可能优化）
      }
      // 好：使用引用或移动语义
      std::string process_string_fast(const std::string& input) {
      std::string result = input;
      result += " processed";
      return result;
      }
      // 更好：C++17 string_view
      std::string process_with_view(std::string_view input) {
      std::string result(input);
      result += " processed";
      return result;
      }
      // 2. 预分配空间避免重新分配
      std::string large_string;
      large_string.reserve(10000);  // 预分配空间
      for (int i = 0; i < 10000; ++i) {
      large_string += "a";
      }
      // 3. 使用移动语义（C++11）
      std::string source = "Very long string...";
      std::string destination = std::move(source);  // 移动，不拷贝
      // 4. 字符串连接优化
      // 不好：多次重新分配
      std::string slow;
      for (int i = 0; i < 1000; ++i) {
      slow += "word ";  // 可能多次重新分配
      }
      // 好：使用 ostringstream
      std::ostringstream oss;
      for (int i = 0; i < 1000; ++i) {
      oss << "word ";
      }
      std::string fast = oss.str();
      return 0;
      }

总结

核心要点：

优先使用 std::string 代替 C 风格字符串
C++20 推荐 std::format 进行字符串格式化
C++17 的 std::string_view 用于只读访问，避免拷贝
使用 std::stringstream 进行复杂字符串构建
注意编码问题，特别是多语言文本处理
利用算法库（<algorithm>）进行字符串操作
C++11 的正则表达式<regex> 用于模式匹配

选择指南：

简单操作：使用 std::string 成员函数
格式化：C++20 用 std::format，之前用 std::stringstream
只读访问：使用 std::string_view
模式匹配：使用 std::regex
高性能场景：考虑预分配、移动语义、避免拷贝
跨平台编码：使用 UTF-8，考虑第三方编码库

C++ 的字符串处理虽然比 Python 更底层，但通过标准库提供的丰富工具，同样可以高效、安全地处理各种字符串操作需求。

发表于 2026-01-06 21:51 jzssuanfa 阅读(6) 评论(0) 收藏举报

刷新页面返回顶部

Python 字符串全面解析 - 详解

文章目录

Python 字符串全面解析

1. 字符串创建

2. 基本操作

访问和索引

拼接和重复

3. 字符串方法

查找和替换

大小写转换

空白字符处理

分割和连接

判断方法

4. 字符串格式化

f-string（Python 3.6+ 推荐）

format() 方法

% 格式化（旧式）

5. 转义字符和原始字符串

6. 字符串编码

7. 字符串检查和处理

8. 实战示例

9. 性能提示

总结

C++ 字符串全面解析

1. 字符串类型和创建

2. 基本操作

访问和索引

切片（子串）

拼接和重复

3. 字符串方法

查找和替换

大小写转换

空白字符处理

分割和连接

4. 字符串格式化

std::format（C++20 推荐）

std::stringstream（C++98 及以后）

sprintf（C 风格，需要谨慎使用）

5. 字符串比较和查找

6. 字符串修改和操作

7. 字符串编码和转换

8. 字符串视图（C++17）

9. 正则表达式（C++11）

10. 实战示例

11. 性能提示

总结

核心要点：

选择指南：

导航