Commit b66928ca by BellCodeEditor

auto save

parent 1c74bdbe
Showing with 60 additions and 2 deletions
import re
import requests
url = 'https://v.qq.com/' # 替换为你想要爬取的网页链接
response = requests.get(url)
html_content = response.text
# 使用正则表达式提取网页标题
title_pattern = re.compile(r'<title>(.*?)</title>')
title = re.search(title_pattern, html_content)
if title:
print(title.group(1))
# 使用正则表达式提取所有段落内容
paragraph_pattern = re.compile(r'<p>(.*?)</p>')
paragraphs = re.findall(paragraph_pattern, html_content)
for paragraph in paragraphs:
print(paragraph)
\ No newline at end of file
...@@ -2,4 +2,7 @@ from pyecharts.charts import Bar ...@@ -2,4 +2,7 @@ from pyecharts.charts import Bar
subjects = ["浈浈", "聪聪", "小智", "波奇"] subjects = ["浈浈", "聪聪", "小智", "波奇"]
scores1 = [92, 95, 82, 88] # 期中成绩 scores1 = [92, 95, 82, 88] # 期中成绩
scores2 = [95, 79, 93, 90] # 期末成绩 scores2 = [95, 79, 93, 90] # 期末成绩
\ No newline at end of file bar=Bar()
bar.add_xaxis(subjects)
bar.add_yaxis()
\ No newline at end of file
import requests
from bs4 import BeautifulSoup
def fetch_webpage(url):
try:
response = requests.get(url)
response.raise_for_status() # 如果请求失败(如404, 500等),将抛出HTTPError异常
return response.text
except requests.exceptions.RequestException as e:
print(f"Error fetching the webpage: {e}")
return None
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
headers = []
# 提取所有h1, h2, h3, h4, h5, h6标签
for header_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
headers.extend(soup.find_all(header_tag))
return headers
def print_headers(headers):
for header in headers:
print(f"{header.name}: {header.get_text(strip=True)}")
def main():
url = 'https://v.qq.com/' # 替换为你要爬取的网页URL
html_content = fetch_webpage(url)
if html_content:
headers = parse_html(html_content)
print_headers(headers)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment