玖叶教程网

前端编程开发入门

爬取阿里拍卖.司法公告(阿里拍卖公示)

Python环境为py2.x

#coding=utf-8

from bs4 import BeautifulSoup

import cookie_test

# 禁用安全请求警告

import urllib3

urllib3.disable_warnings()

def sifapaimai():

ret = cookie_test.get('https://sf.taobao.com/notice_list.htm')

if ret['issuccess']==False:

return False

soup = BeautifulSoup(ret['message'],'lxml')

listsoup = soup.find_all('div',{"class":"item-side"})

for item in listsoup:

if item is not None:

soup2 = BeautifulSoup(str(item),'lxml')

lists = soup2.find('a')

print lists

href = 'http:%s'% lists['href']

title = lists['title']

listb = soup2.find_all('span',class_='data')

dtime=''

for i in listb:

dtime = str(("%s %s"%(dtime,i.string))).lstrip()

# detail(url=href)

def detail(url):

ret = cookie_test.get(url)

if ret['issuccess']==False:

return False

soup = BeautifulSoup(ret['message'],'lxml')

tagsoup = soup.find('div',class_='notice-content')

body = tagsoup.text

print body

if __name__ == '__main__':

sifapaimai()

发表评论:

控制面板
您好,欢迎到访网站!
  查看权限
网站分类
最新留言