用户注册



邮箱:

密码:

用户登录


邮箱:

密码:
记住登录一个月忘记密码?

发表随想


还能输入:200字
云代码 - python代码库

修改ip

2019-08-02 作者:云代码会员举报

[python]代码库

# -*- coding:utf-8 -*-
import time
import asyncio
import aiohttp
from aiohttp_requests import requests
import json
import pymssql
num = 0
semaphore = asyncio.Semaphore(3)
stopset = set()
okset = set()
# 数据库
conn = pymssql.connect("127.0.0.1", "sa", "zxc1230.", "GJC_Key")
cursor = conn.cursor()


# ipnum = 0
# async def getip():
#     global ipnum
#     url = 'http://http.tiqu.qingjuhe.cn/getip?num=1&type=1&pack=17534&port=1&lb=1&pb=4&regions='
#     response = await requests.get(url)
#     text = await response.text()
#     ip = text.strip()
#     httpip = 'http://' + ip
#     ipnum += 1
#     print(ipnum)
#     print(httpip)
#     print('='*50+'IP'+'='*50)
#     with open('IP.txt', 'ab+') as f:
#         f.write(httpip.encode('utf-8'))
#         f.write('\n'.encode('utf-8'))
#         f.close()
#     return httpip

class proxies():
    def __init__(self,miao):
        self.miao=miao
        self.tm=time.time()
        self.ip=self.get_ip()
    def getip(self,):
        ntm=time.time()
        shi_cha=ntm-self.tm
        if shi_cha>self.miao:
            self.tm=time.time()
            self.ip=self.get_ip()
            return self.ip
        else:
            return self.ip
    def get_ip():
        import requests
        url = 'http://http.tiqu.qingjuhe.cn/getip?num=1&type=1&pack=17534&port=1&lb=1&pb=4&regions='
        response = requests.get(url)
        ip = response.text.strip()
        httpip = 'http://' + ip
        print(httpip)
        print('=' * 50 + 'IP' + '=' * 50)
        with open('IP.txt', 'ab+') as f:
            f.write(httpip.encode('utf-8'))
            f.write('\n'.encode('utf-8'))
            f.close()
        return httpip

myip=proxies(5)#控制获取ip的实际请求数量,5秒之内返回同一ip 

def getip():
    import requests
    url = 'http://http.tiqu.qingjuhe.cn/getip?num=1&type=1&pack=17534&port=1&lb=1&pb=4&regions='
    response = requests.get(url)
    ip = response.text.strip()
    httpip = 'http://' + ip
    print(httpip)
    print('=' * 50 + 'IP' + '=' * 50)
    with open('IP.txt', 'ab+') as f:
        f.write(httpip.encode('utf-8'))
        f.write('\n'.encode('utf-8'))
        f.close()
    return httpip



async  def to_url(url):
    global num,httpip,stopset
    num += 1
    # print(num)
    async with semaphore:
        await asyncio.sleep(2)
        try:
            Headers = {'Referer': 'https://www.aliexpress.com/'}
            print(httpip)
            reponse = await requests.get(url=url, headers=Headers,proxy=httpip) #,
        except aiohttp.ClientProxyConnectionError as ACPC:
            # 代理失效
            return 0
        except aiohttp.ClientOSError as COSE:
            # 远程连接失败
            return 0
        except Exception as EX:
            # print(repr(EX))
            # print('-' * 50 + 'RequestError' + '-' * 50)
            with open('GetError.txt', 'ab+') as f:
                f.write((url + '----GetError----' + repr(EX)).encode('utf-8'))
                f.write('\n'.encode('utf-8'))
                f.close()
        else:
            status = reponse.status
            text = await reponse.text()
            if 'window.null' in text and status == 200:
                xx = json.loads(text.replace('window.null=', ''))
                # 获取关键词
                try:
                    keystrlist = [key for key in
                                  set([keydict['keywords'] for keydict in xx['keyWordDTOs']])]
                # 空列表
                except KeyError as KE:
                    print(repr(KE))
                    return []
                else:
                    return keystrlist
            else:
                # 写入等待元组进行再次访问
                stopset.add(url)
                # 返回的不是源码错误
                print(text)
                print('-'*50+'Error'+'-'*50)
                return 0



# 生产新的url 和 写入数据
async def get_data(url):
    global stopset,okset,httpip
    keystrlist = await to_url(url) # ['3d printer', '3d wallpaper', '3d pen', '3d', '30', 'mi band 3', '3d mink lashes', 'xiaomi mi band 3', '3239inchtv', '360 lace frontal wig']
    await pj_to_sql(url)
    print(len(stopset))
    if keystrlist:
        # 写入数据库
        Oklist = await write_data(keystrlist)
        # 形成新的url
        if Oklist:
            urlset = await nurl(Oklist)
            stopset = stopset.union(set(urlset)) # 等待的url
        to_get = [get_data(stopset.pop()) for num in range(10)]
        print(len(stopset))
        print('-' * 50 + 'LEN' + '-' * 50)
        await asyncio.gather(*to_get)
    elif keystrlist == 0:
        httpip = myip.getip()
        to_get = [get_data(stopset.pop()) for num in range(10)]
        print(len(stopset))
        print('-' * 50 + 'LEN' + '-' * 50)
        await asyncio.gather(*to_get)
    else:
        to_get = [get_data(stopset.pop()) for num in range(10)]
        print(len(stopset))
        print('-' * 50 + 'LEN' + '-' * 50)
        await asyncio.gather(*to_get)




# 写入关键字数据表
async def write_data(datalist):
    OK = []
    for data in datalist:
        sql = "insert into Keywordsinfo(Keywords,UpdateTime) values('%s','%s')" %(str(data),time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
        try:
            cursor.execute(sql)
            conn.commit()
        # 有重读的略过
        except pymssql.IntegrityError as PIE:
            # print('-' * 50 + 'PIE __ SqlError' + '-' * 50)
            continue
        except Exception as EE:
            # print(repr(EE))
            print(sql)
            print('-' * 50 + 'SqlError' + '-' * 50)
            continue
        else:
            OK.append(data)
    return OK


# 根据可用的keystr生成新的url
async def nurl(npjlist):
    print(npjlist,len(npjlist))
    print('='*25+'OK'+'='*25)
    npjstrlist = []
    for pjstr in npjlist:
        for num in range(123):
            if num < 10:
                npjstrlist.append(pjstr + str(num))
            elif 97 <= num <= 122:
                npjstrlist.append(pjstr + chr(num))
    pjurlset = set(['https://connectkeyword.aliexpress.com/lenoIframeJson.htm?keyword=%s&_=%d' % (
        npjstr.replace(' ','%20'), int(time.time() * 1000)) for npjstr in npjstrlist])
    return pjurlset
    # sleeptime = random.randrange(20, 120)
    # await asyncio.sleep(sleeptime)
    # 形成新的链接
    # urlsplit = URL.split('&')
    # return set([urlsplit[0] + chr(num) + '&' + str(int(time.time() * 1000)) for num in range(97,123)] + [urlsplit[0] + str(num) + '&' + str(int(time.time() * 1000)) for num in range(0, 10)])


async def main(loop):
    global httpip
    URLlist = ['https://connectkeyword.aliexpress.com/lenoIframeJson.htm?keyword=%s&_=%d' % (
    chr(key), int(time.time() * 1000)) for key in range(97, 123)] + [
                  'https://connectkeyword.aliexpress.com/lenoIframeJson.htm?keyword=%s&_=%d' % (
                  str(key), int(time.time() * 1000)) for key in range(0, 10)]
    to_get = [get_data(url) for url in URLlist]
    # 同步运行
    # for url in URLlist:
        # await get_data(url)

    # 获取ip代理
    httpip = myip.getip()
    # 异步访问
    await asyncio.gather(*to_get)
    # asyncio.ensure_future()


# 写入拼接表数据表
async def write_pj_data(sql):
    try:
        cursor.execute(sql)
        conn.commit()
        # 有重读的略过
    except pymssql.IntegrityError as PIE:
        # print('-' * 50 + 'PIE __ SqlError' + '-' * 50)
        return True
    except Exception as EE:
        print(repr(EE))
        print(sql)
        print('-' * 50 + 'SqlError' + '-' * 50)
        return False
    else:
        return True

# 拼接字符串写入数据库
async def pj_to_sql(url):
    pjstr = url.replace('https://connectkeyword.aliexpress.com/lenoIframeJson.htm?keyword=','').split('&_=')[0]
    sql = "insert into ConnectKeywords(Keywords,UpdateTime) values('%s','%s')" %(str(pjstr),time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
    Y_N = await write_pj_data(sql)
    if Y_N:
        pass
    else:
        with open('pjError.txt', 'ab+') as f:
            f.write((url + '----pjError----' + sql).encode('utf-8'))
            f.write('\n'.encode('utf-8'))
            f.close()



# 不进行查询
async def jc_to_sql(keylist):
    keyset = set(keylist)
    sql = 'select key_key from OneKey where ' + ' or '.join(["key_key='%s'" % key for key in keylist])
    cursor.execute(sql)
    cntall = cursor.fetchall()
    testset = set()
    for num in range(len(cntall)):
        testset = testset.union(cntall[num])
    # 返回可以写入的数据
    return keyset.difference(testset)




if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    asyncio.ensure_future(main(loop))
    loop.run_forever()


分享到:
更多

网友评论    (发表评论)


发表评论:

评论须知:

  • 1、评论每次加2分,每天上限为30;
  • 2、请文明用语,共同创建干净的技术交流环境;
  • 3、若被发现提交非法信息,评论将会被删除,并且给予扣分处理,严重者给予封号处理;
  • 4、请勿发布广告信息或其他无关评论,否则将会删除评论并扣分,严重者给予封号处理。