- excel 两列数据:product_code,product_detail
import pandas as pd
from bs4 import BeautifulSoup
df = pd.read_excel('C:/Users/xxx/Desktop/product_detail0711.xlsx')
df['product_detail'] = df['product_detail'].fillna('').astype(str)
result_df = pd.DataFrame(columns=['商品编码', '图片序号', '图片链接'])
data = []
for index, row in df.iterrows():
html_content = row['product_detail']
soup = BeautifulSoup(html_content, 'html.parser')
img_tags = soup.find_all('img')
for i, img_tag in enumerate(img_tags):
img_url = img_tag['src']
row_data = {
'商品编码': row['product_code'],
'图片序号': i + 1,
'图片链接': img_url
}
data.append(row_data)
result_df = pd.DataFrame(data)
result_df.to_excel('C:/Users/xxx/Desktop/product_detail_result0711-xqt.xlsx', index=False)