Python 字符串全面解析

1. 字符串创建

# 使用单引号、双引号、三引号
s1 = 'hello'            # 单引号
s2 = "world"            # 双引号
s3 = '''多行
字符串'''                # 三引号(可以跨行)
s4 = """也是
多行字符串"""
# 字符串构造函数
s5 = str(123)          # "123"
s6 = str(3.14)         # "3.14"
s7 = str([1, 2, 3])    # "[1, 2, 3]"

2. 基本操作

访问和索引

text = "Hello Python"
# 索引访问(从0开始)
print(text[0])      # H
print(text[6])      # P
# 负索引(从-1开始,反向)
print(text[-1])     # n
print(text[-6])     # P
# 切片 [start:end:step]
print(text[0:5])    # Hello
print(text[6:])     # Python
print(text[:5])     # Hello
print(text[::2])    # HloPto(步长为2)
print(text[::-1])   # nohtyP olleH(反转字符串)
# 字符串长度
print(len(text))    # 12

拼接和重复

# 拼接
s1 = "Hello" + " " + "World"  # Hello World
s2 = "Hi " * 3                # Hi Hi Hi 
# 使用join方法(更高效)
words = ["Python", "is", "awesome"]
s3 = " ".join(words)          # Python is awesome
s4 = "-".join(["2024", "01", "01"])  # 2024-01-01

3. 字符串方法

查找和替换

text = "Python is great. Python is powerful."
# 查找
print(text.find("Python"))     # 0(第一次出现的位置)
print(text.find("Java"))       # -1(未找到)
print(text.index("great"))     # 10(找不到会抛出异常)
print("Python" in text)        # True
# 计数
print(text.count("Python"))    # 2
# 替换
new_text = text.replace("Python", "Java", 1)  # 只替换第一个
print(new_text)  # "Java is great. Python is powerful."

大小写转换

text = "Hello World"
print(text.upper())       # HELLO WORLD
print(text.lower())       # hello world
print(text.title())       # Hello World
print(text.capitalize())  # Hello world
print(text.swapcase())    # hELLO wORLD

空白字符处理

text = "  Hello World  \n"
print(text.strip())       # "Hello World"(移除两端空白)
print(text.lstrip())      # "Hello World  \n"(移除左端)
print(text.rstrip())      # "  Hello World"(移除右端)
print("  test  ".strip()) # "test"

分割和连接

# 分割
csv = "apple,banana,orange"
print(csv.split(","))      # ['apple', 'banana', 'orange']
sentence = "Python is fun"
print(sentence.split())    # ['Python', 'is', 'fun'](默认按空白分割)
print(sentence.split(" ", 1))  # ['Python', 'is fun'](只分割一次)
# 按行分割
multi_line = "line1\nline2\nline3"
print(multi_line.splitlines())  # ['line1', 'line2', 'line3']
# 分区(partition)
text = "Python:3.10:release"
print(text.partition(":"))  # ('Python', ':', '3.10:release')

判断方法

s1 = "hello123"
s2 = "HELLO"
s3 = "123"
s4 = "  "
s5 = "Hello World"
print(s1.isalnum())    # True(字母或数字)
print(s1.isalpha())    # False(纯字母)
print(s3.isdigit())    # True(纯数字)
print(s2.isupper())    # True(全大写)
print(s1.islower())    # True(全小写)
print(s4.isspace())    # True(全空白字符)
print(s5.istitle())    # True(标题格式,每个单词首字母大写)
print(s1.startswith("he"))  # True
print(s1.endswith("123"))   # True

4. 字符串格式化

f-string(Python 3.6+ 推荐)

name = "Alice"
age = 25
height = 1.68
# 基础用法
print(f"My name is {name}")  # My name is Alice
# 表达式
print(f"I'm {age + 1} years old")  # I'm 26 years old
# 格式化数字
print(f"Height: {height:.2f}m")    # Height: 1.68m
print(f"Age: {age:03d}")           # Age: 025
# 字典值
person = {"name": "Bob", "age": 30}
print(f"{person['name']} is {person['age']}")  # Bob is 30

format() 方法

# 位置参数
print("{} is {} years old".format("Alice", 25))  # Alice is 25 years old
print("{1} comes after {0}".format("first", "second"))  # second comes after first
# 关键字参数
print("{name} is {age} years old".format(name="Bob", age=30))
# 格式化规范
print("PI: {:.2f}".format(3.14159))      # PI: 3.14
print("Number: {:05d}".format(42))       # Number: 00042
print("Percent: {:.1%}".format(0.75))    # Percent: 75.0%
# 对齐
print("{:<10}".format("left"))      # 左对齐
print("{:>10}".format("right"))     # 右对齐
print("{:^10}".format("center"))    # 居中对齐

% 格式化(旧式)

name = "Charlie"
age = 35
print("Name: %s, Age: %d" % (name, age))       # Name: Charlie, Age: 35
print("Float: %.3f" % 3.14159)                 # Float: 3.142
print("Hex: 0x%x" % 255)                       # Hex: 0xff

5. 转义字符和原始字符串

# 常见转义字符
print("Line1\nLine2")      # 换行
print("Tab\tseparated")    # 制表符
print("Backslash: \\")     # 反斜杠
print("Quote: \'\"")       # 单引号和双引号
# 原始字符串(不转义)
path = r"C:\Users\Name\Documents"  # 常用于Windows路径
regex = r"\d+\s+\w+"               # 常用于正则表达式
print(path)  # C:\Users\Name\Documents
# 多行字符串(自动包含换行符)
multiline = """第一行
第二行
第三行"""

6. 字符串编码

# 编码转换
text = "你好,世界"
# 编码为字节
utf8_bytes = text.encode("utf-8")    # b'\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc\x8c\xe4\xb8\x96\xe7\x95\x8c'
gbk_bytes = text.encode("gbk")       # b'\xc4\xe3\xba\xc3\xa3\xac\xca\xc0\xbd\xe7'
# 解码为字符串
decoded_text = utf8_bytes.decode("utf-8")  # 你好,世界
# 字符和Unicode码点
print(ord("A"))    # 65(字符→Unicode)
print(chr(65))     # A(Unicode→字符)
print("\u4f60\u597d")  # 你好(Unicode转义)

7. 字符串检查和处理

# 移除前缀/后缀(Python 3.9+)
url = "https://example.com"
print(url.removeprefix("https://"))  # example.com
filename = "document.txt"
print(filename.removesuffix(".txt"))  # document
# 填充
text = "Hello"
print(text.ljust(10, "-"))  # Hello-----(左对齐填充)
print(text.rjust(10, "*"))  # *****Hello(右对齐填充)
print(text.center(11, "="))  # ===Hello===(居中对齐)
# 展开制表符
print("a\tb\tc".expandtabs(4))  # a   b   c(制表符替换为4个空格)

8. 实战示例

# 示例1:统计词频
def word_frequency(text):
words = text.lower().split()
frequency = {}
for word in words:
# 移除标点
word = word.strip(".,!?;:")
if word:
frequency[word] = frequency.get(word, 0) + 1
return frequency
text = "Hello world! Hello Python. Python is great."
print(word_frequency(text))
# {'hello': 2, 'world': 1, 'python': 2, 'is': 1, 'great': 1}
# 示例2:检查回文
def is_palindrome(s):
s = ''.join(c.lower() for c in s if c.isalnum())
return s == s[::-1]
print(is_palindrome("A man, a plan, a canal: Panama"))  # True
# 示例3:密码强度检查
def check_password(password):
if len(password) < 8:
return "密码太短"
if not any(c.isupper() for c in password):
return "需要大写字母"
if not any(c.islower() for c in password):
return "需要小写字母"
if not any(c.isdigit() for c in password):
return "需要数字"
return "密码强度足够"
print(check_password("Pass1234"))  # 密码强度足够
# 示例4:格式化表格数据
def format_table(data):
# 找出每列最大宽度
col_widths = [max(len(str(item)) for item in col) for col in zip(*data)]
# 格式化每行
for row in data:
formatted_row = " | ".join(f"{str(item):<{width}}"
for item, width in zip(row, col_widths))
print(formatted_row)
data = [
["Name", "Age", "City"],
["Alice", "25", "New York"],
["Bob", "30", "London"],
["Charlie", "35", "Paris"]
]
format_table(data)

9. 性能提示

# 1. 避免使用 + 拼接大量字符串
# 不好的方式
result = ""
for i in range(10000):
result += str(i)  # 每次创建新字符串
# 好的方式(使用列表推导式 + join)
result = "".join(str(i) for i in range(10000))
# 2. 使用in运算符检查子串
if "search" in large_string:  # 快
pass
# 3. 字符串是不可变的
s = "hello"
s2 = s.upper()  # 创建新字符串,原字符串不变
print(s)        # "hello"
print(s2)       # "HELLO"

总结

  • 字符串是不可变序列
  • 支持丰富的内置方法
  • f-string是最推荐的格式化方式
  • 使用join()拼接大量字符串更高效
  • 注意编码问题,特别是处理多语言文本时.


C++ 字符串全面解析

C++ 提供了多种处理字符串的方式,包括 C 风格字符串和现代 C++ 的 std::string 类。以下是详细指南:

1. 字符串类型和创建

#include <iostream>
  #include <string>
    #include <cstring>  // C 风格字符串
      #include <sstream>  // 字符串流
        #include <format>   // C++20 格式化
          int main() {
          // 1. C 风格字符串(字符数组)
          char cstr1[] = "Hello";          // 自动添加 \0
          char cstr2[10] = "World";        // 预留空间
          const char* cstr3 = "Hello";     // 只读,存储在常量区
          // 2. std::string(推荐)
          std::string s1 = "Hello";        // 从C字符串构造
          std::string s2("World");         // 构造函数
          std::string s3(5, 'A');          // "AAAAA",重复字符
          std::string s4(s1);              // 拷贝构造
          // 3. 原始字符串字面量(C++11)
          std::string raw_str = R"(Line1\nLine2\tTab)";  // 不转义
          std::string path = R"(C:\Users\Name\Documents)";
          // 多行原始字符串
          std::string multiline = R"(
          First line
          Second line
          Third line
          )";
          // 4. 宽字符串
          std::wstring wstr = L"宽字符串";
          std::u16string utf16_str = u"UTF-16 字符串";
          std::u32string utf32_str = U"UTF-32 字符串";
          return 0;
          }

2. 基本操作

访问和索引

#include <iostream>
  #include <string>
    int main() {
    std::string text = "Hello C++";
    // 1. 索引访问(operator[],不检查边界)
    std::cout << text[0] << std::endl;   // H
    std::cout << text[7] << std::endl;   // +
    // 2. at() 方法(检查边界,越界抛出异常)
    std::cout << text.at(1) << std::endl; // e
    // 3. 前/后字符访问
    std::cout << text.front() << std::endl; // H
    std::cout << text.back() << std::endl;  // +
    // 4. 获取C风格字符串
    const char* cstr = text.c_str();
    const char* data_ptr = text.data();  // C++17前可能需要\0,C++17保证
    // 5. 字符串长度
    std::cout << "Length: " << text.length() << std::endl;     // 9
    std::cout << "Size: " << text.size() << std::endl;         // 9
    std::cout << "Capacity: " << text.capacity() << std::endl; // 容量
    std::cout << "Empty? " << text.empty() << std::endl;       // 是否为空
    return 0;
    }

切片(子串)

#include <iostream>
  #include <string>
    int main() {
    std::string text = "Hello World";
    // 1. substr() 获取子串
    std::string sub1 = text.substr(0, 5);   // Hello (位置0,长度5)
    std::string sub2 = text.substr(6);      // World (位置6到末尾)
    std::string sub3 = text.substr(6, 5);   // World
    // 2. 复制子串到字符数组
    char buffer[20];
    text.copy(buffer, 5, 0);  // 从位置0复制5个字符
    buffer[5] = '\0';         // 手动添加结束符
    std::cout << buffer << std::endl;  // Hello
    return 0;
    }

拼接和重复

#include <iostream>
  #include <string>
    int main() {
    // 1. 使用 + 运算符
    std::string s1 = "Hello";
    std::string s2 = "World";
    std::string s3 = s1 + " " + s2;  // Hello World
    // 2. 使用 += 运算符
    std::string result = "Hello";
    result += " ";
    result += s2;  // Hello World
    // 3. 使用 append()
    std::string s4 = "Hello";
    s4.append(" ").append("World");  // Hello World
    s4.append(3, '!');               // Hello World!!!
    // 4. 使用 push_back() 添加单个字符
    s4.push_back('?');
    // 5. 字符串重复(C++23 有 repeat())
    std::string repeated;
    for (int i = 0; i < 3; ++i) {
    repeated += "Hi ";
    }
    // C++23: std::string repeated = std::string("Hi ").repeat(3);
    return 0;
    }

3. 字符串方法

查找和替换

#include <iostream>
  #include <string>
    #include <algorithm>
      int main() {
      std::string text = "C++ is powerful. C++ is fast.";
      // 1. 查找
      size_t pos1 = text.find("C++");        // 0
      size_t pos2 = text.find("C++", 1);     // 17(从位置1开始找)
      size_t pos3 = text.find("Java");       // std::string::npos
      // 2. 反向查找
      size_t rpos = text.rfind("C++");       // 17(从后往前)
      // 3. 查找任意字符
      size_t pos4 = text.find_first_of("+-*/");  // 1(找到+)
      size_t pos5 = text.find_last_of(".");      // 28(找到.)
      // 4. 检查是否包含(C++23)
      // bool contains = text.contains("C++");
      // 5. 替换
      std::string text2 = text;
      text2.replace(0, 3, "Rust");          // 从0开始替换3个字符
      // Rust is powerful. C++ is fast.
      // 替换所有出现的子串
      std::string text3 = text;
      size_t start_pos = 0;
      while ((start_pos = text3.find("C++", start_pos)) != std::string::npos) {
      text3.replace(start_pos, 3, "Go");
      start_pos += 2; // 移动到替换后位置
      }
      // 6. 计数
      int count = 0;
      size_t pos = 0;
      while ((pos = text.find("C++", pos)) != std::string::npos) {
      ++count;
      pos += 3; // 跳过找到的字符串长度
      }
      std::cout << "Count: " << count << std::endl;  // 2
      return 0;
      }

大小写转换

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cctype>
        int main() {
        std::string text = "Hello World";
        // 1. 转换为大写
        std::string upper = text;
        std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper);
        std::cout << upper << std::endl;  // HELLO WORLD
        // 2. 转换为小写
        std::string lower = text;
        std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
        std::cout << lower << std::endl;  // hello world
        // 3. 使用lambda自定义转换
        std::string capitalized = text;
        if (!capitalized.empty()) {
        capitalized[0] = std::toupper(capitalized[0]);
        for (size_t i = 1; i < capitalized.length(); ++i) {
        capitalized[i] = std::tolower(capitalized[i]);
        }
        }
        std::cout << capitalized << std::endl;  // Hello world
        return 0;
        }

空白字符处理

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cctype>
        // 修剪字符串辅助函数
        std::string trim_left(const std::string& str) {
        auto it = std::find_if(str.begin(), str.end(),
        [](unsigned char ch) { return !std::isspace(ch); });
        return std::string(it, str.end());
        }
        std::string trim_right(const std::string& str) {
        auto it = std::find_if(str.rbegin(), str.rend(),
        [](unsigned char ch) { return !std::isspace(ch); });
        return std::string(str.begin(), it.base());
        }
        std::string trim(const std::string& str) {
        return trim_right(trim_left(str));
        }
        int main() {
        std::string text = "  Hello World  \n\t";
        // 使用辅助函数
        std::cout << "Trim left: '" << trim_left(text) << "'" << std::endl;
        std::cout << "Trim right: '" << trim_right(text) << "'" << std::endl;
        std::cout << "Trim both: '" << trim(text) << "'" << std::endl;
        return 0;
        }

分割和连接

#include <iostream>
  #include <string>
    #include <vector>
      #include <sstream>
        #include <algorithm>
          #include <iterator>
            // 分割字符串
            std::vector<std::string> split(const std::string& str, char delimiter) {
              std::vector<std::string> tokens;
                std::string token;
                std::istringstream tokenStream(str);
                while (std::getline(tokenStream, token, delimiter)) {
                tokens.push_back(token);
                }
                return tokens;
                }
                // 使用 find 分割
                std::vector<std::string> split_find(const std::string& str,
                  const std::string& delimiter) {
                  std::vector<std::string> tokens;
                    size_t start = 0, end = 0;
                    while ((end = str.find(delimiter, start)) != std::string::npos) {
                    tokens.push_back(str.substr(start, end - start));
                    start = end + delimiter.length();
                    }
                    tokens.push_back(str.substr(start));
                    return tokens;
                    }
                    int main() {
                    std::string csv = "apple,banana,orange";
                    // 1. 分割
                    std::vector<std::string> fruits = split(csv, ',');
                      for (const auto& fruit : fruits) {
                      std::cout << fruit << std::endl;
                      }
                      // 2. 连接
                      std::vector<std::string> words = {"C++", "is", "awesome"};
                        // 方法1:使用循环
                        std::string result1;
                        for (size_t i = 0; i < words.size(); ++i) {
                        result1 += words[i];
                        if (i != words.size() - 1) {
                        result1 += " ";
                        }
                        }
                        // 方法2:使用 stringstream(推荐)
                        std::ostringstream oss;
                        for (size_t i = 0; i < words.size(); ++i) {
                        if (i != 0) oss << " ";
                        oss << words[i];
                        }
                        std::string result2 = oss.str();
                        // 方法3:自定义连接函数
                        auto join = [](const std::vector<std::string>& vec,
                          const std::string& delimiter) {
                          if (vec.empty()) return std::string();
                          std::string result = vec[0];
                          for (size_t i = 1; i < vec.size(); ++i) {
                          result += delimiter + vec[i];
                          }
                          return result;
                          };
                          std::string result3 = join(words, " - ");
                          std::cout << result3 << std::endl;  // C++ - is - awesome
                          return 0;
                          }

4. 字符串格式化

std::format(C++20 推荐)

#include <iostream>
  #include <string>
    #include <format>
      int main() {
      std::string name = "Alice";
      int age = 25;
      double height = 1.68;
      // 1. 基础格式化
      std::string s1 = std::format("My name is {}", name);
      std::cout << s1 << std::endl;  // My name is Alice
      // 2. 位置参数
      std::string s2 = std::format("{1} comes after {0}", "first", "second");
      std::cout << s2 << std::endl;  // second comes after first
      // 3. 数字格式化
      std::string s3 = std::format("Height: {:.2f}m", height);  // Height: 1.68m
      std::string s4 = std::format("Age: {:03d}", age);         // Age: 025
      std::string s5 = std::format("Hex: 0x{:x}", 255);         // Hex: 0xff
      std::string s6 = std::format("Percent: {:.1%}", 0.75);    // Percent: 75.0%
      // 4. 对齐和填充
      std::string s7 = std::format("{:<10}", "left");     // 左对齐
      std::string s8 = std::format("{:>10}", "right");    // 右对齐
      std::string s9 = std::format("{:^10}", "center");   // 居中对齐
      std::string s10 = std::format("{:*^10}", "center"); // 用*填充
      return 0;
      }

std::stringstream(C++98 及以后)

#include <iostream>
  #include <string>
    #include <sstream>
      #include <iomanip>
        int main() {
        std::string name = "Bob";
        int age = 30;
        double salary = 12345.67;
        // 1. 基本使用
        std::ostringstream oss;
        oss << "Name: " << name << ", Age: " << age;
        std::string result = oss.str();
        std::cout << result << std::endl;
        // 2. 格式化数字
        std::ostringstream oss2;
        oss2 << std::fixed << std::setprecision(2)
        << "Salary: $" << salary;
        std::cout << oss2.str() << std::endl;  // Salary: $12345.67
        // 3. 填充和对齐
        std::ostringstream oss3;
        oss3 << std::left << std::setw(10) << "Name"
        << std::right << std::setw(8) << "Age" << std::endl
        << std::left << std::setw(10) << "Alice"
        << std::right << std::setw(8) << 25 << std::endl
        << std::left << std::setw(10) << "Bob"
        << std::right << std::setw(8) << 30;
        std::cout << oss3.str() << std::endl;
        return 0;
        }

sprintf(C 风格,需要谨慎使用)

#include <iostream>
  #include <cstdio>
    #include <string>
      int main() {
      char buffer[100];
      const char* name = "Charlie";
      int age = 35;
      // 使用 sprintf(注意缓冲区溢出风险!)
      std::sprintf(buffer, "Name: %s, Age: %d", name, age);
      std::string result = buffer;
      std::cout << result << std::endl;
      // C++11 更安全的 snprintf
      std::snprintf(buffer, sizeof(buffer),
      "Name: %s, Age: %03d", name, age);
      std::cout << buffer << std::endl;
      return 0;
      }

5. 字符串比较和查找

#include <iostream>
  #include <string>
    #include <algorithm>
      #include <cstring>
        int main() {
        std::string s1 = "Hello";
        std::string s2 = "World";
        std::string s3 = "hello";
        // 1. 比较运算符
        std::cout << std::boolalpha;
        std::cout << (s1 == "Hello") << std::endl;   // true
        std::cout << (s1 != s2) << std::endl;        // true
        std::cout << (s1 < s2) << std::endl;         // true (字典序)
        std::cout << (s1 > s2) << std::endl;         // false
          // 2. compare() 方法
          int result = s1.compare(s2);
          if (result < 0) {
          std::cout << s1 << " < " << s2 << std::endl;
          } else if (result > 0) {
          std::cout << s1 << " > " << s2 << std::endl;
            } else {
            std::cout << s1 << " == " << s2 << std::endl;
            }
            // 3. 大小写不敏感比较
            auto case_insensitive_compare = [](const std::string& a,
            const std::string& b) {
            if (a.length() != b.length()) return false;
            for (size_t i = 0; i < a.length(); ++i) {
            if (std::tolower(a[i]) != std::tolower(b[i])) {
            return false;
            }
            }
            return true;
            };
            std::cout << "Case insensitive compare: "
            << case_insensitive_compare(s1, s3) << std::endl;  // true
            // 4. C风格字符串比较
            const char* cstr1 = "Hello";
            const char* cstr2 = "World";
            int cmp = std::strcmp(cstr1, cstr2);
            std::cout << "strcmp result: " << cmp << std::endl;
            return 0;
            }

6. 字符串修改和操作

#include <iostream>
  #include <string>
    #include <algorithm>
      int main() {
      // 1. 插入
      std::string text = "Hello World";
      text.insert(5, " C++");      // Hello C++ World
      text.insert(text.end(), '!'); // 末尾插入字符
      // 2. 删除
      std::string text2 = text;
      text2.erase(5, 4);           // 删除从位置5开始的4个字符
      text2.pop_back();            // 删除最后一个字符(C++11)
      // 3. 清空
      std::string text3 = text;
      text3.clear();               // 清空字符串
      std::cout << "Is empty: " << text3.empty() << std::endl;
      // 4. 调整大小
      std::string text4 = "Hello";
      text4.resize(10, '!');       // Hello!!!!!
      text4.resize(3);             // Hel
      // 5. 交换
      std::string a = "Hello";
      std::string b = "World";
      a.swap(b);                    // a="World", b="Hello"
      std::swap(a, b);              // 同上
      // 6. 反转
      std::string text5 = "Hello";
      std::reverse(text5.begin(), text5.end());
      std::cout << text5 << std::endl;  // olleH
      // 7. 移除特定字符
      std::string text6 = "Hello, World!";
      text6.erase(std::remove(text6.begin(), text6.end(), ','),
      text6.end());
      std::cout << text6 << std::endl;  // Hello World!
      return 0;
      }

7. 字符串编码和转换

#include <iostream>
  #include <string>
    #include <locale>
      #include <codecvt>  // C++17前可用,C++17后不推荐
        #include <cstdlib>
          int main() {
          // 1. 宽窄字符串转换(C++11-C++17)
          // 注意:codecvt在C++17已弃用,C++20移除
          // 实际项目建议使用第三方库如iconv、ICU
          /*
          // UTF-8 转 wstring
          std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
            std::string utf8_str = "你好,世界";
            std::wstring wide_str = converter.from_bytes(utf8_str);
            // wstring 转 UTF-8
            std::string utf8_str2 = converter.to_bytes(wide_str);
            */
            // 2. 字符编码判断
            auto is_ascii = [](const std::string& str) {
            for (unsigned char c : str) {
            if (c > 127) return false;
            }
            return true;
            };
            std::cout << "Is ASCII: " << is_ascii("Hello") << std::endl;      // true
            std::cout << "Is ASCII: " << is_ascii("你好") << std::endl;        // false
            // 3. 大小写转换(考虑本地化)
            std::locale loc;
            std::string text = "Hello World";
            for (char& c : text) {
            c = std::tolower(c, loc);
            }
            std::cout << text << std::endl;  // hello world
            return 0;
            }

8. 字符串视图(C++17)

#include <iostream>
  #include <string>
    #include <string_view>
      void process_string_view(std::string_view sv) {
      // string_view 是只读视图,不拥有数据
      std::cout << "Length: " << sv.length() << std::endl;
      std::cout << "Substring: " << sv.substr(0, 5) << std::endl;
      std::cout << "Find: " << sv.find("World") << std::endl;
      }
      int main() {
      // 1. 从各种来源创建 string_view
      std::string str = "Hello World";
      const char* cstr = "Hello World";
      char arr[] = "Hello World";
      std::string_view sv1(str);           // 从 std::string
      std::string_view sv2(cstr);          // 从 C 字符串
      std::string_view sv3(arr);           // 从字符数组
      std::string_view sv4("Literal");     // 从字符串字面量
      // 2. 避免复制,提高性能
      process_string_view(str);
      process_string_view(cstr);
      process_string_view("Direct literal");
      // 3. string_view 操作(类似 string)
      std::string_view text = "Hello World";
      std::cout << text[0] << std::endl;        // H
      std::cout << text.substr(6) << std::endl; // World
      std::cout << text.find("World") << std::endl; // 6
      // 4. 注意事项:string_view 不管理生命周期
      // 危险示例:
      std::string_view dangerous() {
      std::string temp = "Temporary";
      return temp;  // temp 被销毁,string_view 悬垂!
      }
      return 0;
      }

9. 正则表达式(C++11)

#include <iostream>
  #include <string>
    #include <regex>
      int main() {
      std::string text = "Email: test@example.com, Phone: 123-456-7890";
      // 1. 匹配
      std::regex email_pattern(R"(\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b)");
      if (std::regex_search(text, email_pattern)) {
      std::cout << "Found email address" << std::endl;
      }
      // 2. 提取所有匹配
      std::regex phone_pattern(R"(\d{3}-\d{3}-\d{4})");
      std::sregex_iterator begin(text.begin(), text.end(), phone_pattern);
      std::sregex_iterator end;
      for (auto it = begin; it != end; ++it) {
      std::smatch match = *it;
      std::cout << "Phone: " << match.str() << std::endl;
      }
      // 3. 替换
      std::regex hidden_pattern(R"(\d{3}-\d{3}-)\d{4}");
      std::string hidden = std::regex_replace(text, hidden_pattern, "$1****");
      std::cout << "Hidden: " << hidden << std::endl;
      // 4. 验证
      std::regex date_pattern(R"(\d{4}-\d{2}-\d{2})");
      std::string date = "2024-01-01";
      if (std::regex_match(date, date_pattern)) {
      std::cout << "Valid date format" << std::endl;
      }
      return 0;
      }

10. 实战示例

#include <iostream>
  #include <string>
    #include <vector>
      #include <map>
        #include <algorithm>
          #include <sstream>
            #include <cctype>
              // 示例1:统计词频
              std::map<std::string, int> word_frequency(const std::string& text) {
                std::map<std::string, int> frequency;
                  std::istringstream iss(text);
                  std::string word;
                  while (iss >> word) {
                  // 移除标点并转换为小写
                  word.erase(std::remove_if(word.begin(), word.end(), ::ispunct),
                  word.end());
                  std::transform(word.begin(), word.end(), word.begin(), ::tolower);
                  if (!word.empty()) {
                  ++frequency[word];
                  }
                  }
                  return frequency;
                  }
                  // 示例2:检查回文
                  bool is_palindrome(const std::string& str) {
                  std::string cleaned;
                  std::copy_if(str.begin(), str.end(), std::back_inserter(cleaned),
                  [](char c) { return std::isalnum(c); });
                  std::transform(cleaned.begin(), cleaned.end(), cleaned.begin(), ::tolower);
                  return std::equal(cleaned.begin(), cleaned.begin() + cleaned.size() / 2,
                  cleaned.rbegin());
                  }
                  // 示例3:密码强度检查
                  std::string check_password(const std::string& password) {
                  if (password.length() < 8) {
                  return "密码太短";
                  }
                  bool has_upper = std::any_of(password.begin(), password.end(), ::isupper);
                  bool has_lower = std::any_of(password.begin(), password.end(), ::islower);
                  bool has_digit = std::any_of(password.begin(), password.end(), ::isdigit);
                  if (!has_upper) return "需要大写字母";
                  if (!has_lower) return "需要小写字母";
                  if (!has_digit) return "需要数字";
                  return "密码强度足够";
                  }
                  // 示例4:格式化表格
                  void format_table(const std::vector<std::vector<std::string>>& data) {
                    if (data.empty()) return;
                    // 计算每列最大宽度
                    std::vector<size_t> col_widths(data[0].size(), 0);
                      for (const auto& row : data) {
                      for (size_t i = 0; i < row.size(); ++i) {
                      col_widths[i] = std::max(col_widths[i], row[i].length());
                      }
                      }
                      // 打印表格
                      for (const auto& row : data) {
                      for (size_t i = 0; i < row.size(); ++i) {
                      std::cout << std::left << std::setw(col_widths[i] + 2)
                      << row[i];
                      }
                      std::cout << std::endl;
                      }
                      }
                      int main() {
                      // 测试词频统计
                      std::string text = "Hello world! Hello C++. C++ is great.";
                      auto freq = word_frequency(text);
                      for (const auto& [word, count] : freq) {
                      std::cout << word << ": " << count << std::endl;
                      }
                      // 测试回文检查
                      std::cout << "Is palindrome: "
                      << is_palindrome("A man, a plan, a canal: Panama")
                      << std::endl;
                      // 测试密码检查
                      std::cout << "Password check: "
                      << check_password("Pass1234") << std::endl;
                      // 测试表格格式化
                      std::vector<std::vector<std::string>> table = {
                        {"Name", "Age", "City"},
                        {"Alice", "25", "New York"},
                        {"Bob", "30", "London"},
                        {"Charlie", "35", "Paris"}
                        };
                        format_table(table);
                        return 0;
                        }

11. 性能提示

#include <iostream>
  #include <string>
    #include <chrono>
      int main() {
      // 1. 避免不必要的字符串拷贝
      // 不好:多次拷贝
      std::string process_string(std::string input) {
      std::string result = input;  // 拷贝
      result += " processed";
      return result;  // 可能再次拷贝(RVO/NRVO可能优化)
      }
      // 好:使用引用或移动语义
      std::string process_string_fast(const std::string& input) {
      std::string result = input;
      result += " processed";
      return result;
      }
      // 更好:C++17 string_view
      std::string process_with_view(std::string_view input) {
      std::string result(input);
      result += " processed";
      return result;
      }
      // 2. 预分配空间避免重新分配
      std::string large_string;
      large_string.reserve(10000);  // 预分配空间
      for (int i = 0; i < 10000; ++i) {
      large_string += "a";
      }
      // 3. 使用移动语义(C++11)
      std::string source = "Very long string...";
      std::string destination = std::move(source);  // 移动,不拷贝
      // 4. 字符串连接优化
      // 不好:多次重新分配
      std::string slow;
      for (int i = 0; i < 1000; ++i) {
      slow += "word ";  // 可能多次重新分配
      }
      // 好:使用 ostringstream
      std::ostringstream oss;
      for (int i = 0; i < 1000; ++i) {
      oss << "word ";
      }
      std::string fast = oss.str();
      return 0;
      }

总结

核心要点:

  1. 优先使用 std::string 代替 C 风格字符串
  2. C++20 推荐 std::format 进行字符串格式化
  3. C++17 的 std::string_view 用于只读访问,避免拷贝
  4. 使用 std::stringstream 进行复杂字符串构建
  5. 注意编码问题,特别是多语言文本处理
  6. 利用算法库<algorithm>)进行字符串操作
  7. C++11 的正则表达式<regex> 用于模式匹配

选择指南:

  • 简单操作:使用 std::string 成员函数
  • 格式化:C++20 用 std::format,之前用 std::stringstream
  • 只读访问:使用 std::string_view
  • 模式匹配:使用 std::regex
  • 高性能场景:考虑预分配、移动语义、避免拷贝
  • 跨平台编码:使用 UTF-8,考虑第三方编码库

C++ 的字符串处理虽然比 Python 更底层,但通过标准库提供的丰富工具,同样可以高效、安全地处理各种字符串操作需求。