1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| import pandas as pd from bs4 import BeautifulSoup
df = pd.read_excel('C:/Users/xxx/Desktop/product_detail0711.xlsx')
df['product_detail'] = df['product_detail'].fillna('').astype(str)
result_df = pd.DataFrame(columns=['商品编码', '图片序号', '图片链接'])
data = []
for index, row in df.iterrows(): html_content = row['product_detail'] soup = BeautifulSoup(html_content, 'html.parser') img_tags = soup.find_all('img') for i, img_tag in enumerate(img_tags): img_url = img_tag['src'] row_data = { '商品编码': row['product_code'], '图片序号': i + 1, '图片链接': img_url } data.append(row_data)
result_df = pd.DataFrame(data)
result_df.to_excel('C:/Users/xxx/Desktop/product_detail_result0711-xqt.xlsx', index=False)
|