#conut程序
import timefrom get_parting import url_listwhile True: print(url_list.find().count()) time.sleep(5)
#main程序及多進程
from multiprocessing import Poolfrom channl_cates import channl_listfrom page_parting import get_countent, url_list, item_info, get_links_fromdb_urls = [item['url'] for item in url_list.find()]index_urls = [item['url'] for item in item_info.find()]x = set(db_urls)y = set(index_urls)rest_of_urls = x-ydef get_all_links_from(channel):def get_all_links_from(channel): for num in range(2,101,1): get_links_from(channel,1,num)if name=='main': pool=Pool() pool.map(get_all_links_from,channl_list.split())
#獲取url_list程序
from bs4 import BeautifulSoupimport requestsimport timeimport pymongoclient = pymongo.MongoClient('localhost',27017)jort58 = client['jort58_']url_list = jort58['url_list']item_info=jort58['item_info']def get_links_from(channl,who_sell,pages): links_views='{}{}/pn{}'.format(channl,str(who_sell),str(pages)) wb_data=requests.get(links_views) time.sleep(1) soup=BeautifulSoup(wb_data.text,'lxml') if soup.find('td','t'): for link in soup.select('td.t a.t'): item_links=link.get('href').split('?')[0] if url_list.find_one({'url':item_links}): pass else: url_list.insert_one({'url':item_links}) print(item_links) else: pass