使用Python机器人自动提交HDU OJ题目并验证是否正确

整天胡思乱想乱想乱想就想做。前面明明还有那么多东西没做完,做完这个之后必须把前面的想法做完再去做新东西不然就剁手剁手剁手立贴为证::>_<::::>_<::::>_<::
这个刷题机器人是这阶段最后一个要做的做完这个之后先把前面准备做但没做完的完成不然就剁手!

要做刷题机器人,当然是机器人自己找代码去提交。暂时又没有智能到自己做题的机器人,只能网上找代码了。跟以前一样,python来找代码。

最开始的想法是:张浩斌老师维护了一个公众号hustoj,可以提供各大OJ题解,我打算用C#写模拟点击的程序,利用微信电脑端给公众号发消息获取代码的。有这想法是因为我以为张浩斌老师有各大oj的代码数据库,可是后来发现并不是这样的。所以就自己去抓代码吧。

找来找去有一个特别适合抓代码的网站,这个网站本来就收集了各大牛博客贴的题解,我再抓来放到自己数据库就OK了(放到数据库是因为打算用数据库跟提交代码的机器人中转通信,为以后写vjudge做准备练手用)。

做起来挺快的,也感谢学校老师借给我一台windows服务器用,可以放心的把代码放到服务器跑了(自己的aliyun Linux服务器经常跑着跑着就退出了,不知道是我写的代码有问题还是我没配置好可是代码在windows跑就没问题)

半个多小时已经抓到ID 1700了,有很多题目是没有题解的,看看电影到凌晨两三点估计就能抓完了。
抓取代码如下:

import HTMLParser
import urlparse
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError  
import cookielib
import bs4
from bs4 import BeautifulSoup
import MySQLdb
import sys
import datetime

url = 'http://accepted.com.cn/hdoj'

opener = urllib2.build_opener()

for i in range(1224, 5567):
    print 'ID', i , ':'
    request = Request(url + str(i))

    try:    
        response = opener.open(request)
    except URLError, e:    
        if hasattr(e, 'code'):     
            print 'Error code: ', e.code
            source = 'none'
        elif hasattr(e, 'reason'):    
            print 'We failed to reach a server.'    
            print 'Reason: ', e.reason
            source = 'none'
    else:
        soup = BeautifulSoup(response, 'html.parser')

        a_target = soup.find_all(class_='crayon-plain-wrap')
        
        if a_target != []:
            source = a_target[0].text.encode("utf-8")
            print 'get code success'
        else:
            source = 'none'
            print 'no code'
    time = datetime.datetime.now()
    conn = MySQLdb.connect(host='yourdatabaseserver',user='',passwd='',db='',port=)
    cur = conn.cursor()
    cur.execute('set names utf8')
    sql = ( 'insert into `oj` (`oj_name`, `problem_id`,`source`, `if_submit`, `success`, `time`)'
            'values (%s,%s,%s,%s,%s,%s)',('hdu', int(i), source, '0', '0', time))
    cur.execute(*sql)
    conn.commit()
    cur.close()
    conn.close()

放一张勤劳工作图
勤劳工作图.png

没找到好看的电影,处理了一下开多任务同时抓不到半个小时就把能抓到的都抓了。
又一会会二又把提交的代码写出来了,如下:

import HTMLParser 
import urlparse 
import urllib 
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError  
import cookielib 
import string
import bs4
from bs4 import BeautifulSoup
import xml.dom.minidom
import re
import os
import PIL
import StringIO
from PIL import Image
from PIL import ImageEnhance  
from PIL import ImageFilter
import glob
import pytesser
from pytesser import* 
import sys
import time
import MySQLdb


login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0'

que_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
headers = {
    'Host':'acm.hdu.edu.cn',
    'Origin':'http://acm.hdu.edu.cn',
    'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
    'Connection':'keep-alive',
    'Cache-Control':'max-age=0',
    'Content-Type':'application/x-www-form-urlencoded',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}

cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)

account = {
    'username':'vividtest',
    'userpass': ''
}
data = urllib.urlencode(account)

for i in range(1000, 5566):
    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
    cur = conn.cursor()
    cur.execute('set names utf8')
    sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
    cur.execute(sql)
    ans_code = cur.fetchone()[0]
    conn.close()

    if 'none' == ans_code:
        continue
    else:
        request = urllib2.Request(login_url, data, headers)
        try:    
            response = opener.open(request)
        except URLError, e:    
            if hasattr(e, 'code'):     
                print 'Error code: ', e.code
            elif hasattr(e, 'reason'):    
                print 'Failed to reach a server.'    
                print 'Reason: ', e.reason
        else:
            #print response.read()
            values = {
                'language':0,
                'usercode':ans_code,
                'problemid':i
            }
            data = urllib.urlencode(values)
            request = urllib2.Request(que_url, data, headers)
            response = opener.open(request)
            print response.read()
            time.sleep(10)

为了尽量避免给杭电OJ的管理员们带来麻烦,代码休眠十秒提交一次,这是在测试,等开始正式刷题之后我会随机休眠1-10分钟提交一次。
攻陷杭电OJ.png

现在已经可以去攻占 杭电OJ 了。
BUT,THINK HIGHTer:
干脆把获取题目评测状态一起写了吧,写完这个功能就可以整合到oj做vjudge了。

写完了,发代码:

import HTMLParser 
import urllib 
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError  
import cookielib 
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random

def getStatus(status):
    if (status == 'Accepted'):
        return 1
    elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
        return 2
    else:
        return 0

login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='

headers = {
    'Host':'acm.hdu.edu.cn',
    'Origin':'http://acm.hdu.edu.cn',
    'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
    'Connection':'keep-alive',
    'Cache-Control':'max-age=0',
    'Content-Type':'application/x-www-form-urlencoded',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}

cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)

account = {
    'username':'vividtest',
    'userpass': ''
}
data = urllib.urlencode(account)

for i in range(1000, 5567):
    print 'ID', i
    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
    cur = conn.cursor()
    cur.execute('set names utf8')
    sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
    cur.execute(sql)
    ans_code = cur.fetchone()[0]
    cur.close()
    conn.close()

    if 'none' == ans_code:
        print 'none code'
        continue
    else:
        request = urllib2.Request(login_url, data, headers)
        try:    
            response = opener.open(request)
        except URLError, e:
            if hasattr(e, 'code'):     
                print 'Error code: ', e.code
                source = 'none'
            elif hasattr(e, 'reason'):    
                print 'We failed to reach a server.'    
                print 'Reason: ', e.reason
                source = 'none'
        else:
            #print response.read()
            values = {
                'language':0,
                'usercode':ans_code,
                'problemid':i
            }
            data = urllib.urlencode(values)
            request = urllib2.Request(submit_url,data, headers)
            try:    
                response = opener.open(request)
            except URLError, e:    
                print 'submit failed'
            else:
                print 'submit success'
                time.sleep(1)
                request = urllib2.Request(status_url + str(i), None, headers)
                try:
                    response = opener.open(request)
                except URLError, e:    
                    print 'get status failed'
                else:
                    status_code = 2
                    while 2 == status_code:
                        try:
                            response = opener.open(request)
                        except URLError, e:    
                            print 'get status failed---once,I will again'
                            continue
                        else:
                            soup = BeautifulSoup(response, 'html.parser')
                            table = soup.find_all('tr')
                            status = table[7].font.text
                            print status
                            status_code = getStatus(status)
                            time.sleep(1)
                    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
                    cur = conn.cursor()
                    cur.execute('set names utf8')
                    sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
                    cur.execute(*sql)
                    conn.commit()
                    cur.close()
                    conn.close()
                    print 'judge complete, statue: ' + status
                    random_time = random.randint(60,600)
                    print "I'm sleeping, I will be back in", random_time ,"s"
                    time.sleep(random_time)

Server运行效果图一张:
server.png

又做修改,加入获取不到状态时异常处理,加入编译错误信息记录

import HTMLParser 
import urllib 
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError  
import cookielib 
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random

def getStatus(status):
    if (status == 'Accepted'):
        return 1
    elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
        return 2
    elif (status=='Compilation Error'):
        return 3
    else:
        return 0

login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='

headers = {
    'Host':'acm.hdu.edu.cn',
    'Origin':'http://acm.hdu.edu.cn',
    'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
    'Connection':'keep-alive',
    'Cache-Control':'max-age=0',
    'Content-Type':'application/x-www-form-urlencoded',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}

cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)

account = {
    'username':'vividtest',
    'userpass': ''
}
data = urllib.urlencode(account)

for i in range(1053, 5567):
    run_error = 0
    print 'ID', i
    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
    cur = conn.cursor()
    cur.execute('set names utf8')
    sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
    cur.execute(sql)
    ans_code = cur.fetchone()[0]
    cur.close()
    conn.close()

    if 'none' == ans_code:
        print 'none code'
        continue
    else:
        request = urllib2.Request(login_url, data, headers)
        try:    
            response = opener.open(request)
        except URLError, e:
            if hasattr(e, 'code'):     
                print 'Error code: ', e.code
                source = 'none'
            elif hasattr(e, 'reason'):    
                print 'We failed to reach a server.'    
                print 'Reason: ', e.reason
                source = 'none'
        else:
            #print response.read()
            values = {
                'language':0,
                'usercode':ans_code,
                'problemid':i
            }
            data = urllib.urlencode(values)
            request = urllib2.Request(submit_url,data, headers)
            try:    
                response = opener.open(request)
            except URLError, e:    
                print 'submit failed'
            else:
                print 'submit success'
                time.sleep(1)
                request = urllib2.Request(status_url + str(i), None, headers)
                try:
                    response = opener.open(request)
                except URLError, e:    
                    print 'get status failed'
                else:
                    status_code = 2
                    while 2 == status_code:
                        try:
                            response = opener.open(request)
                        except URLError, e:    
                            print 'get status failed---once,I will again'
                            continue
                        else:
                            soup = BeautifulSoup(response, 'html.parser')
                            table = soup.find_all('tr')
                            try:
                                status = table[7].font.text
                            except:
                                print 'Error I acn not handle'
                                run_error = 1
                            else:
                                print status
                                status_code = getStatus(status)
                                time.sleep(1)
                            if run_error:
                                break
                        if run_error:
                            break
                    if run_error:
                        continue
                    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
                    cur = conn.cursor()
                    cur.execute('set names utf8')
                    sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
                    cur.execute(*sql)
                    conn.commit()
                    cur.close()
                    conn.close()
                    print 'judge complete, statue: ' + status
                    random_time = random.randint(10,60)
                    print "I'm sleeping, I will be back in", random_time ,"s"
                    time.sleep(random_time)

再改一次,用sql语句查出还没有提交过的代码自动提交而不是人工更改题目id。这个已经可以当作vjudge评测提交服务运行了,等汤大神有空了请教他python进驻守护进程的办法。代码:

import HTMLParser 
import urllib 
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError  
import cookielib 
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random

def getStatus(status):
    if (status == 'Accepted'):
        return 1
    elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
        return 2
    elif (status=='Compilation Error'):
        return 3
    else:
        return 0

login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='

headers = {
    'Host':'acm.hdu.edu.cn',
    'Origin':'http://acm.hdu.edu.cn',
    'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0&notice=0',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
    'Connection':'keep-alive',
    'Cache-Control':'max-age=0',
    'Content-Type':'application/x-www-form-urlencoded',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}

cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)

account = {
    'username':'vividtest',
    'userpass': ''
}
data = urllib.urlencode(account)
head_file = '#include <iostream>\n#include <cstring>\n#include <cstdio>\n#include <cstdlib>\n#include <cmath>\n#include <string>\n#include <vector>\n#include <list>\n#include <map>\n#include <queue>\n#include <stack>\n#include <bitset>\n#include <algorithm>\n#include <numeric>\n#include <functional>\nusing namespace std;\n'
while True:
    run_error = 0
    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
    cur = conn.cursor()
    cur.execute('set names utf8')
    '''
    sql = "SELECT `success` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
    cur.execute(sql)
    code = cur.fetchone()[0]
    if code != 3:
        cur.close()
        conn.close()
        continue
    '''
    sql = "SELECT `source`,`problem_id` FROM `oj` WHERE `if_submit`=0 AND `source` != 'none' ORDER BY `time` ASC LIMIT 0, 1"
    cur.execute(sql)
    result = cur.fetchall()
    cur.close()
    conn.close()
    ans_code = result[0][0]
    i = result[0][4]
    print 'ID', i

    if 'none' == ans_code:
        print 'none code'
        continue
    else:
        request = urllib2.Request(login_url, data, headers)
        try:    
            response = opener.open(request)
        except URLError, e:
            if hasattr(e, 'code'):     
                print 'Error code: ', e.code
            elif hasattr(e, 'reason'):    
                print 'We failed to reach a server.'    
                print 'Reason: ', e.reason
        else:
            values = {
                'language':0,
                'usercode':head_file+ans_code,
                'problemid':i
            }
            data = urllib.urlencode(values)
            request = urllib2.Request(submit_url,data, headers)
            try:    
                response = opener.open(request)
            except URLError, e:    
                print 'submit failed'
            else:
                print 'submit success'
                time.sleep(1)
                request = urllib2.Request(status_url + str(i), None, headers)
                try:
                    response = opener.open(request)
                except URLError, e:    
                    print 'get status failed'
                else:
                    status_code = 2
                    while 2 == status_code:
                        try:
                            response = opener.open(request)
                        except URLError, e:    
                            print 'get status failed---once,I will again'
                            time.sleep(2)
                            continue
                        else:
                            soup = BeautifulSoup(response, 'html.parser')
                            table = soup.find_all('tr')
                            try:
                                status = table[7].font.text
                            except:
                                print 'Error I acn not handle'
                                run_error = 1
                            else:
                                print status
                                status_code = getStatus(status)
                                time.sleep(1)
                            if run_error:
                                break
                        if run_error:
                            break
                    if run_error:
                        continue
                    conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
                    cur = conn.cursor()
                    cur.execute('set names utf8')
                    sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
                    cur.execute(*sql)
                    conn.commit()
                    cur.close()
                    conn.close()
                    print 'judge complete, status: ' + status
                    random_time = random.randint(10,60)
                    print "I'm sleeping, I will be back in", random_time ,"s"
                    time.sleep(random_time)

攻陷杭电图
IMG_4059.PNG

Tag: none

Leave a new comment