python字符串处理

2025-01-12 技术教程

字符串

字符串：不可变有序序列，在python可使用 "abc" , """abc""" ,'abc' 的形式表示，属于一种字面常量，python3中字符均属于Unicode编码。

字符串可以被迭代，遍历，切片，索引字符串一旦定义不可增加、删除、修改自身元素。对字符串的修改操作实质是创建一个新的字符串对象重新赋值

# 迭代遍历s = " this is a string\n"for i in s:print(i)# 索引,切片print(s[4], s[2:5],s[2,-3])字符串遍历生成列表，元组

lis = list(s) # 遍历 s 中的每一个元素生成，组成一个列表返回tp = tuple(s) # 遍历 s 中的每一个元素生成，组成一个元组返回

列表，元组生成字符串

# 列表，元组，拼接为字符串，使用str.join()方法lis = ['h','e','l','l','o']# 遍历lis,将lis中的每一个元素拼接为字符串并返回，# 注：lis中每一个元素必须是字符串，否则不能拼接，可用str()转换num_list = [1,2,3,4,5]# s2 = "".join(num_list) ==> 将会报错，无法拼接数值s1 = "".join(map(str, num_list)) # 将数值全部转化为字符串拼接s2 = "".join(lis) # 也可以指定 " ,"作为连接符号进行拼接s3 = ","join(lis) print(s1, s2, s3) 字符串的常用方法字符串的拼接：

返回的是一个全新的字符串对象，s1,s2本身均未发生改变

s3 = s1 + s2 字符串分割

s = "this is a string\n"s_1 = s.split() # 以连续空白进行分割，返回lis ==> ['this', 'is', 'a', 'string']s_2 = s.split(" ") # 以空字符进行分割, \n不是空字符 ==> ['this', 'is', 'a', 'string\n']s_3 = s.rsplit(" ",maxsplit=2) # 右边开始，最大切割次数为 2次 ==> ['this is', 'a', 'string\n']# 行切割，splitlines()s = "firstline \n secondline \r\n thirdline \r"s_4 = s.splitlines() # ==> ['firstline ', ' secondline ', ' thirdline ']s_5 = s.splitlines(True) # 保留切割符 ==> ['firstline \n', ' secondline \r\n', ' thirdline \r'] # 只切割一次，无论是否切割，返回 3个元素的元组，pre,seq,tail）==>('this', ' ', 'is a string\n')s_6 = s.partition(" ") # 从字符串右边开始进行分割 ==> ('firstline \n secondline \r\n thirdline', ' ', '\r')s_6 = s.rpartition(" ") 其他方法

s = "HEllo world"print(s.upper()) # ==> HELLO WORLD 全大写print(s.capitalize()) # ==> Hello world 首部大写print(s.title()) # ==> Hello world 驼峰原则print(s.lower()) # ==> hello world 全小写print(s.swapcase()) # ==> heLLO WORLD 交换大小写格式化输出

s = "hello"print(s.center(20, "#")) # #######hello######## 指定宽度和填充字符,居中print(s.zfill(20)) # 000000000000000hello 右对齐，0填充print(s.ljust(20, "#")) # hello############### 指定宽度和填充字符，左对齐print(s.rjust(20, "#")) # ###############hello 指定宽度和填充字符，右对齐字符替换

s = "heffo worfd"print(s.replace("f", "l")) # hello worldprint(s.replace("f", "l", 2)) # hello worfdprint(s.replace("ff", "l")) # helo worfdprint(s) # heffo worfd 均返回新的字符串，s 未做任何改变# 批量替换in_tab = "abxy" out_tab = "1234" str_trantab = str.maketrans(in_tab,out_tab) # 创建一个转化表, abcd --> 1234s = "abcdefghijklmnopqrstuvwxyz" s.translate(str_trantab) # s将会根据转换表替换对应的字符 ==> 12cdefghijklmnopqrstuvw34z strip() 删除两端字符

s = " \t hello world \n "print(s.strip()) # 'hello world'print(s.strip("\t")) # ' hello world \n ' print(s.strip("\n")) # ' \t hello world \n 'print(s.strip("")) # '\t hello world \n'print(s.strip(" h\t")) # 'ello world's.lstrip() # 只对字符串左边进行操作s.rstrip() # 只对字符串右边进行操作字符串查找

# find("",start ,stop) ,# 指定查找的字符，并可以选自查找范围，start -> stop 索引范围# 找到匹配字符串返回索引，未找到返回 -1 s = "this is a long long long string"print(s.find("a")) # 返回索引号 8print(s.find("long")) # 返回第一索引号 10print(s.find("long", 11, -1)) # 从索引11开始查找，第一个long索引为 15print(s.count("long", 15, -1)) #从索引15开始查找，只能找到后面两个long# 字符串的查找都需要对字符串进行遍历，当字符串较长，将会耗费较大的时间

# endswith, startswith 判断字符串是否使用该字符结尾或者开头# 例如：检查一个文件是否是Python文件格式file_name = "hello.py"file_name.endswith(".py") # 是否是.py结尾"hello".startswith("he") # 是否 he开头字符检测方法，返回 True或者 False

# 字符检测方法，返回 True或者 False# endswith, startswith 判断字符串是否使用该字符结尾或者开头# 例如：检查一个文件是否是Python文件格式file_name = "hello.py"file_name.endswith(".py") # 是否是.py结尾"hello".startswith("he") # 是否 he开头"hell_fa".isidentifier() # 检查是否符合标识符规则，是不是字母和下划线开头，其他都是字母数字、下划线"abc".isalpha() # 是否全部为字母"123".isdigit() # 10进制数字"123".isdecimal() # (0-9)的数字"123abc".isalnum() # 是否是字母或者数字 "abc".islower() # 全小写"ABC".isupper() # 全大写"\n\t\f\r".isspace() # 只包含空白, \t\n\r\f等均为空白字符# 以结果全部为 True字符串格式化输出

1. c语言风格格式化输出

s = "hello world"print("%s,%r" %(s,s)) # %s 调用字符串对象的 __str__方法输出，%r会调用__repr__ 方法，输出不同# 输出 hello world, 'hello world'

试例：

s = "hello world"class String(str): def __str__(self): return "__str__:{}".format(super().__repr__()) def __repr__(self): return "__repr__:{}".format(super().__repr__())string = String(s)print("%s, %r" %(string,string))# %s 输出结果： __str__:'hello world', %r 输出结果： __repr__:'hello world'# %s 调用了对象的 __str__方法，%r 调用了__repr__ 方法

常用输出形式

"%d" %10 #'10'"%#d" %10 #' 10'"%-05d" %10 #'10 '"%d" %10 #'10'# 可通过以上方式设置对齐方式和宽度等"%f" %10 #'10.000000'"%5.3f" %10 #'10.000'"%10.2f" %10 #' 10.00'"%-10.2f" %10 #'10.00 '

进制转化

# 无前缀"%x"%10 # ==> 'a'"%o"%10 # ==> '12'# 带前缀 "%#x"%10 # ==> '0xa'"%#o"%10 # ==> '0o12'# 对其方式，宽度，显示精度均可设置# 科学计数法表示"%-10.2e" %10 #'1.00e+01 '# 百分数表示"%-.2f%%" %10 #'10.00%' 在值后面拼接 %， %% ==> 输出一个%format 格式化函数

# format 格式化函数a,b,c = 1,2,"tom""{}, {name}, {}".format(a,b,name=c) # 位置参数和关键字参数传入 ==> '1, tom, 2'# 设置填充字符 "#", 对齐方式，宽度"{}".format(10) #'10'"{:#<10}".format(10) #'10########' "{:0>10}".format(10) #'0000000010'"{:$^10}".format(10) #'$$$$10$$$$'#进制转化"{:x}".format(10) #'a' 16进制"{:o}".format(10) #'12' 8进制"{:b}".format(10) #'1010' 2进制#带进制前缀符输出"{:#b}".format(10) #'0b1010' 0b表示2进制"{:#x}".format(10) #'0xa' 0x表示16进制"{:#o}".format(10) #'0o12' 0o表示8进制# 百分号表示，科学计数"{:.2%}".format(0.5) #'50.00%' 计算的结果转化为 % 形式表示"{:.2e}".format(0.5) #'5.00e-01'"{:.2E}".format(50) #''5.00E+01'