如何用Python爬虫代理增加网站流量

2025-01-13 技术教程

这篇文章主要介绍如何用Python爬虫代理增加网站流量，文中示例代码介绍的非常详细，具有一定的参考价值，感兴趣的小伙伴们一定要看完！

获得了免费的代理列表，那么就有很多事情可以干，比如，爬取某个网站并且没有被封IP的风险，比如，增加某网站的流量。

完整代码：

#coding:utf-8importurllib2importurllibimportcookielibimporthashlibimportreimporttimeimportjsonimportunittestfromseleniumimportwebdriverfrombs4importBeautifulSoupfrompip._vendor.distlib._backport.tarfileimportTUREADfromtimeimportsleepfromseleniumimportwebdriverfromselenium.webdriver.common.keysimportKeysfromselenium.webdriver.common.desired_capabilitiesimportDesiredCapabilitiesimportrandomclassSpide:def__init__(self,proxy_ip,proxy_type,proxy_port,use_proxy=False):print'usingtheproxyinfo:',proxy_ipself.proxy_ip=proxy_ipself.proxy_type=proxy_typeself.proxy_port=proxy_portself.proxy=urllib2.ProxyHandler({proxy_type:proxy_ip+":"+proxy_port})self.usercode=""self.userid=""self.cj=cookielib.LWPCookieJar();self.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj));ifuse_proxy:self.opener=urllib2.build_opener(self.proxy)urllib2.install_opener(self.opener);defadd_view(self):print'--->startaddingview'print'--->proxyinfo',self.proxy_ipservice_args=['--proxy='+self.proxy_ip+':'+self.proxy_port,'--proxy-type='+self.proxy_type,]dcap=dict(DesiredCapabilities.PHANTOMJS)dcap["phantomjs.page.settings.userAgent"]=("Mozilla/5.0(X11;Linuxx86_64)AppleWebKit/53""(KHTML,likeGecko)Chrome/15.0.87")driver=webdriver.PhantomJS(executable_path='/home/bin/phantomjs',service_args=service_args,desired_capabilities=dcap)driver.set_page_load_timeout(90)driver.get("http://www.503error.com/")soup=BeautifulSoup(driver.page_source,'xml')titles=soup.find_all('h2',{'class':'entry-title'})ranCount=random.randint(0,len(titles))print'randomfindalinkofthewebsitetoaccess,randomis:',ranCountrandomlink=titles[ranCount].a.attrs['href']driver.get(randomlink)driver.close()print'finishonce'defget_proxy(self):proxy_info_json=""#firstgettheproxyinfofromprint'-->usingtheip'+self.proxy_ip+'togettheproxyinfo'try:reqRequest_proxy=urllib2.Request('url2');reqRequest_proxy.add_header('Accept','*/*');reqRequest_proxy.add_header('Accept-Language','zh-CN,zh;q=0.8');reqRequest_proxy.add_header('User-Agent','Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/28.0.1500.72Safari/537.36');reqRequest_proxy.add_header('Content-Type','application/x-www-form-urlencoded');proxy_info=urllib2.urlopen(reqRequest_proxy).read();printproxy_infoproxy_info_json=json.loads(proxy_info)return_str=proxy_info_json['protocol']+":"+proxy_info_json['ip']+proxy_info_json['port']exceptException,e:print'proxyhaveproblem'#printproxy_info_json['protocol']#printproxy_info_json['ip']#printproxy_info_json['port']returnproxy_info_json#printproxy_infodefget_proxys100(self):proxy_info_json=""#firstgettheproxyinfofromprint'-->usingtheip'+self.proxy_ip+'togettheproxyinfo100'try:reqRequest_proxy=urllib2.Request('url1');reqRequest_proxy.add_header('Accept','*/*');reqRequest_proxy.add_header('Accept-Language','zh-CN,zh;q=0.8');reqRequest_proxy.add_header('User-Agent','Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/28.0.1500.72Safari/537.36');reqRequest_proxy.add_header('Content-Type','application/x-www-form-urlencoded');proxy_info=urllib2.urlopen(reqRequest_proxy).read();#printproxy_infoproxy_info_json=json.loads(proxy_info)#forporxy_iinproxy_info_json:#printporxy_i#return_str=proxy_info_json['protocol']+":"+proxy_info_json['ip']+proxy_info_json['port']returnproxy_info_jsonexceptException,e:print'proxyhaveproblem'if__name__=="__main__":#firstimegettheproxyprint'STARTADDINGVIEW:'print'GetingthenewproxyinfoFirsttime'print'---------------------------------------------------------------------------------------------------------'forcountinrange(1):test=Spide(proxy_ip='youproxyip',proxy_type='http',proxy_port='3128',use_proxy=False)proxy_list=test.get_proxy()print'->thisisthe:',countprint'->Getingthenewproxyinfo:'print'->usingtheproxytogetproxylistincaseforbiden'print'->proxyinfo',proxy_listproxy100=test.get_proxys100()forproxy1inproxy100:try:print'proxy1:',proxy1Spide1=Spide(proxy_ip=proxy1['ip'],proxy_type=proxy1['type'],proxy_port=proxy1['port'],use_proxy=True)print'beforeaddview'Spide1.add_view()print'->sleep15s'time.sleep(15)#sleeprandomtimetoranTime=random.randint(10,50)print'->sleeprandomtime:',ranTimetime.sleep(ranTime)print'->gettingnewproxy'#proxy_list=Spide1.get_proxy()exceptException,e:print'->somethingwrong,hahah,next'

一点小的注释：

整个流程为： 1 获取代理－＞２　访问首页　—＞３　获取首页博客列表，随机访问－＞４随机等待Ｎ秒　－＞返回第１步

1：你需要更改youproxyip为你一个你已经拥有的代理ip，或者，不用填写，因为后边的use_proxy=False, 这个时候你确保你能够不适用代理访问到代码中的两个自动抓取代理ip地址的网站

2：/home/bin/phantomjs 这个路径是你安装的phantomjs路径

3：代码中有两个获取代理的方法，例子中选择了一个（不要喷我下边的循环为什么是一次还要循环，因为这个版本是原来是有外层循环的）

4：获取免费代理地址就不写了，url1 ,url2 为隐藏的获取免费代理的网站

以上是如何用Python爬虫代理增加网站流量的所有内容，感谢各位的阅读！希望分享的内容对大家有帮助，更多相关知识，欢迎关注亿速云行业资讯频道！