import re |
s = '<h1>python爬虫正则匹配1个标签里的内容内容</h1>' |
s2 = '<h1>python爬虫正则匹配1个标签里的内容内容</h1>' |
result = re.match(r '<(?P<htmlTag>\w+)>(.+)</(?P=htmlTag)>$' , s) |
result = re.match(r '<(?P<htmlTag>\w+)>(?P<content>.+)</(?P=htmlTag)>$' , s2) |
print (result.group( 'htmlTag' )) |
print (result.group( 'content' )) |