抓彩票信息.md
import json
import os
import sys
from collections import defaultdict
from functools import reduce
from lxml import etree
import requests
base_path = os.path.dirname(os.path.realpath(sys.argv[0]))
file_html = "lottery.html"
file_json = "lottery.json"
file_html_path = os.path.join(base_path, file_html)
file_json_path = os.path.join(base_path, file_json)
def handler_data():
result = []
html = etree.parse('./lottery.html', etree.HTMLParser())
# result = etree.tostring(html)
# print(result.decode('utf-8'))
data = html.xpath("//tbody[@id='tdata']/tr[@class='t_tr1']/td")
start = 0
end = 15
while True:
start += 15
end += 15
if end <= len(data):
cols = data[start:end]
tmp = {
'qihao': cols[0].text,
'1': cols[1].text,
'2': cols[2].text,
'3': cols[3].text,
'4': cols[4].text,
'5': cols[5].text,
'6': cols[6].text,
'7': cols[7].text,
'day': cols[14].text,
}
# print(tmp)
result.append(tmp)
else:
break
return result
def init_data():
if not os.path.exists(file_json):
url = 'https://datachart.500.com/dlt/history/newinc/history.php?limit=100000&sort=0'
ret = requests.get(url)
with open("lottery.html", "w", encoding='utf-8') as f:
f.write(ret.text)
r = handler_data()
with open(file_json_path, "w") as f:
f.write(json.dumps(r, ensure_ascii=False) + '\n')
def get_new_data():
url = 'https://datachart.500.com/dlt/history/newinc/history.php?limit=30&sort=0'
ret = requests.get(url)
with open("lottery.html", "w", encoding='utf-8') as f:
f.write(ret.text)
def read_data():
with open(file_json_path, 'r') as f:
data = json.loads(f.read())
return data
def add_data():
get_new_data()
adds_data = handler_data()
old_data = read_data()
for ad in adds_data:
if ad not in old_data:
old_data.append(ad)
with open(file_json_path, "w") as f:
f.write(json.dumps(old_data, ensure_ascii=False) + '\n')
def random_weight(weight_data):
import random
total = sum(weight_data.values()) # 权重求和
ra = random.uniform(0, total) # 在0与权重和之前获取一个随机数
curr_sum = 0
ret = None
keys = weight_data.keys()
for k in keys:
curr_sum += weight_data[k] # 在遍历中,累加当前权重值
if ra <= curr_sum: # 当随机数<=当前权重和时,返回权重key
ret = k
break
return ret
def gen_num_queue(src, count):
ret = []
now_count = 0
while now_count < count:
num = random_weight(src)
if num not in ret:
ret.append(num)
now_count += 1
return ret
def handler():
data = read_data()
# print(data)
ret1 = defaultdict(int)
ret2 = defaultdict(int)
for d in data:
for i in ('1', '2', '3', '4', '5'):
ret1[d[i]] += 1
for d in data:
for i in ('6', '7'):
ret2[d[i]] += 1
# print(json.dumps(ret1, indent=4))
# print(json.dumps(ret2, indent=4))
g = lambda x, y: "%s %s" % (x, y)
print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
if __name__ == '__main__':
init_data()
add_data()
handler()
lottery.html
<div id="container">
<div class="warp">
<div class="wrap_datachart">
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td>
<div class="wrap_datachart">
<div class="tubiao_box">
<div class="tubiao_box_t">
<table width="81%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="200">
<span class="span_left">è¶
级大ä¹éï¼<span class="cfont2">å¼å¥ä¿¡æ¯</span></span></td>
<td align="center" style="font-weight: normal">
<a href="javascript:;" style="color:red">æè¿30æ</a>
<a href="javascript:;" onclick="getChartdata(50,0)">æè¿50æ</a>
<a href="javascript:;" onclick="getChartdata(100,0)">æè¿100æ</a>
<input id="start" name="start" value="19147" size="10" />
æ è³<input id="end" name="end" value="20026" size="10" />
æ <img src="/images/info/tubiao/cx01.gif" align="absmiddle" border="0" style="cursor:pointer;" onclick="getChartdata(30,0,1)" />
</td>
</tr>
</table>
<div style="width: 950px; height: 0px; clear: both; line-height: 0px; font-size: 0px;"></div>
</div>
</div>
<div class="chart">
<table width="100%" border="1" cellpadding="3" cellspacing="0" bordercolor="#C0DCF8" id="tablelist">
<tr>
<!--<td class="td_title1 dot_paixu2">æå¥ç¨ç/å¥</td>-->
<td class="td_title1" style="cursor:pointer;"width="80" align="center" onclick="getChartdata(30,1);" rowspan="2"><strong>æå·</strong><img src="/images/info/tubiao/dot_paixu2.gif" border="0" align="absmiddle" id="imgorder"> <img src="/images/info/tubiao/paixu.gif" border="0" align="top" /></td>
<td class="td_title1" colspan="5" >ååºå·ç </td>
<td class="td_title1" colspan="2" rowspan="2">ååº</td>
<td class="td_title1" rowspan="2">奿± å¥é(å
)</td>
<td class="td_title1" colspan="2">ä¸çå¥</td>
<td class="td_title1" colspan="2">äºçå¥</td>
<td class="td_title1" rowspan="2">æ»ææ³¨é¢(å
)</td>
<td class="td_title1" rowspan="2">å¼å¥æ¥æ</td>
</tr>
<tr class="t_tr1">
<td class="t_tr1">1</td>
<td class="t_tr1">2</td>
<td class="t_tr1">3</td>
<td class="t_tr1">4</td>
<td class="t_tr1">5</td>
<td class="t_tr1">注æ°</td>
<td class="t_tr1">å¥é(å
)</td>
<td class="t_tr1">注æ°</td>
<td class="t_tr1">å¥é(å
)</td>
</tr>
<tbody id="tdata">
<tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20026</td><td class="cfont2">03</td><td class="cfont2">15</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">08</td><td class="cfont4">12</td><td class="t_tr1">1,423,923,368</td><td class="t_tr1">7</td><td class="t_tr1">9,777,969</td><td class="t_tr1">97</td><td class="t_tr1">148,263</td><td class="t_tr1">267,647,668</td><td class="t_tr1">2020-04-20</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20025</td><td class="cfont2">02</td><td class="cfont2">14</td><td class="cfont2">21</td><td class="cfont2">22</td><td class="cfont2">26</td><td class="cfont4">05</td><td class="cfont4">07</td><td class="t_tr1">1,442,053,199</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">81</td><td class="t_tr1">158,628</td><td class="t_tr1">290,980,102</td><td class="t_tr1">2020-04-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20024</td><td class="cfont2">03</td><td class="cfont2">05</td><td class="cfont2">06</td><td class="cfont2">24</td><td class="cfont2">30</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,437,049,655</td><td class="t_tr1">4</td><td class="t_tr1">9,810,302</td><td class="t_tr1">143</td><td class="t_tr1">56,756</td><td class="t_tr1">274,944,706</td><td class="t_tr1">2020-04-15</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20023</td><td class="cfont2">04</td><td class="cfont2">11</td><td class="cfont2">23</td><td class="cfont2">26</td><td class="cfont2">30</td><td class="cfont4">01</td><td class="cfont4">06</td><td class="t_tr1">1,448,119,562</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">76</td><td class="t_tr1">144,157</td><td class="t_tr1">266,526,290</td><td class="t_tr1">2020-04-13</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20022</td><td class="cfont2">04</td><td class="cfont2">14</td><td class="cfont2">20</td><td class="cfont2">28</td><td class="cfont2">35</td><td class="cfont4">02</td><td class="cfont4">03</td><td class="t_tr1">1,459,915,832</td><td class="t_tr1">1</td><td class="t_tr1">10,000,000</td><td class="t_tr1">85</td><td class="t_tr1">132,699</td><td class="t_tr1">279,808,308</td><td class="t_tr1">2020-04-11</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20021</td><td class="cfont2">10</td><td class="cfont2">12</td><td class="cfont2">24</td><td class="cfont2">31</td><td class="cfont2">33</td><td class="cfont4">04</td><td class="cfont4">09</td><td class="t_tr1">1,418,256,965</td><td class="t_tr1">9</td><td class="t_tr1">8,087,069</td><td class="t_tr1">129</td><td class="t_tr1">94,599</td><td class="t_tr1">255,667,147</td><td class="t_tr1">2020-04-08</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20020</td><td class="cfont2">08</td><td class="cfont2">10</td><td class="cfont2">12</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont4">05</td><td class="cfont4">09</td><td class="t_tr1">1,452,932,120</td><td class="t_tr1">3</td><td class="t_tr1">10,000,000</td><td class="t_tr1">76</td><td class="t_tr1">130,721</td><td class="t_tr1">229,103,689</td><td class="t_tr1">2020-04-06</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20019</td><td class="cfont2">01</td><td class="cfont2">12</td><td class="cfont2">22</td><td class="cfont2">23</td><td class="cfont2">33</td><td class="cfont4">01</td><td class="cfont4">12</td><td class="t_tr1">1,445,697,714</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">65</td><td class="t_tr1">206,046</td><td class="t_tr1">251,571,436</td><td class="t_tr1">2020-04-04</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20018</td><td class="cfont2">09</td><td class="cfont2">14</td><td class="cfont2">16</td><td class="cfont2">24</td><td class="cfont2">26</td><td class="cfont4">04</td><td class="cfont4">12</td><td class="t_tr1">1,443,693,869</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">69</td><td class="t_tr1">160,807</td><td class="t_tr1">230,454,828</td><td class="t_tr1">2020-04-01</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20017</td><td class="cfont2">01</td><td class="cfont2">06</td><td class="cfont2">07</td><td class="cfont2">09</td><td class="cfont2">10</td><td class="cfont4">02</td><td class="cfont4">11</td><td class="t_tr1">1,394,320,154</td><td class="t_tr1">6</td><td class="t_tr1">8,730,653</td><td class="t_tr1">150</td><td class="t_tr1">56,374</td><td class="t_tr1">220,830,654</td><td class="t_tr1">2020-03-30</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20016</td><td class="cfont2">04</td><td class="cfont2">08</td><td class="cfont2">19</td><td class="cfont2">31</td><td class="cfont2">35</td><td class="cfont4">01</td><td class="cfont4">02</td><td class="t_tr1">1,414,113,824</td><td class="t_tr1">3</td><td class="t_tr1">10,000,000</td><td class="t_tr1">61</td><td class="t_tr1">217,312</td><td class="t_tr1">235,862,341</td><td class="t_tr1">2020-03-28</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20015</td><td class="cfont2">01</td><td class="cfont2">07</td><td class="cfont2">23</td><td class="cfont2">24</td><td class="cfont2">26</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,389,102,104</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">42</td><td class="t_tr1">280,807</td><td class="t_tr1">213,742,602</td><td class="t_tr1">2020-03-25</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20014</td><td class="cfont2">03</td><td class="cfont2">07</td><td class="cfont2">09</td><td class="cfont2">17</td><td class="cfont2">21</td><td class="cfont4">03</td><td class="cfont4">06</td><td class="t_tr1">1,368,119,121</td><td class="t_tr1">4</td><td class="t_tr1">8,706,884</td><td class="t_tr1">149</td><td class="t_tr1">44,032</td><td class="t_tr1">203,414,775</td><td class="t_tr1">2020-03-23</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20013</td><td class="cfont2">01</td><td class="cfont2">04</td><td class="cfont2">05</td><td class="cfont2">11</td><td class="cfont2">35</td><td class="cfont4">03</td><td class="cfont4">09</td><td class="t_tr1">1,382,155,015</td><td class="t_tr1">9</td><td class="t_tr1">7,976,833</td><td class="t_tr1">45</td><td class="t_tr1">278,794</td><td class="t_tr1">210,216,729</td><td class="t_tr1">2020-03-21</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20012</td><td class="cfont2">01</td><td class="cfont2">06</td><td class="cfont2">18</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">01</td><td class="cfont4">03</td><td class="t_tr1">1,417,484,531</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">35</td><td class="t_tr1">268,738</td><td class="t_tr1">180,512,101</td><td class="t_tr1">2020-03-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20011</td><td class="cfont2">02</td><td class="cfont2">13</td><td class="cfont2">19</td><td class="cfont2">22</td><td class="cfont2">23</td><td class="cfont4">02</td><td class="cfont4">07</td><td class="t_tr1">1,392,702,843</td><td class="t_tr1">5</td><td class="t_tr1">9,096,183</td><td class="t_tr1">58</td><td class="t_tr1">158,385</td><td class="t_tr1">169,402,246</td><td class="t_tr1">2020-03-16</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20010</td><td class="cfont2">01</td><td class="cfont2">08</td><td class="cfont2">13</td><td class="cfont2">24</td><td class="cfont2">32</td><td class="cfont4">02</td><td class="cfont4">09</td><td class="t_tr1">1,408,398,439</td><td class="t_tr1">6</td><td class="t_tr1">8,599,471</td><td class="t_tr1">70</td><td class="t_tr1">114,765</td><td class="t_tr1">193,022,506</td><td class="t_tr1">2020-03-14</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20009</td><td class="cfont2">19</td><td class="cfont2">29</td><td class="cfont2">31</td><td class="cfont2">34</td><td class="cfont2">35</td><td class="cfont4">06</td><td class="cfont4">10</td><td class="t_tr1">1,426,304,213</td><td class="t_tr1">6</td><td class="t_tr1">10,000,000</td><td class="t_tr1">102</td><td class="t_tr1">152,272</td><td class="t_tr1">306,054,010</td><td class="t_tr1">2020-01-20</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20008</td><td class="cfont2">14</td><td class="cfont2">17</td><td class="cfont2">19</td><td class="cfont2">24</td><td class="cfont2">32</td><td class="cfont4">01</td><td class="cfont4">06</td><td class="t_tr1">1,436,007,521</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">57</td><td class="t_tr1">344,283</td><td class="t_tr1">312,604,530</td><td class="t_tr1">2020-01-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20007</td><td class="cfont2">02</td><td class="cfont2">10</td><td class="cfont2">19</td><td class="cfont2">24</td><td class="cfont2">30</td><td class="cfont4">05</td><td class="cfont4">08</td><td class="t_tr1">1,413,642,399</td><td class="t_tr1">12</td><td class="t_tr1">7,162,433</td><td class="t_tr1">131</td><td class="t_tr1">96,229</td><td class="t_tr1">275,894,030</td><td class="t_tr1">2020-01-15</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20006</td><td class="cfont2">09</td><td class="cfont2">22</td><td class="cfont2">25</td><td class="cfont2">31</td><td class="cfont2">32</td><td class="cfont4">08</td><td class="cfont4">12</td><td class="t_tr1">1,474,266,994</td><td class="t_tr1">1</td><td class="t_tr1">10,000,000</td><td class="t_tr1">106</td><td class="t_tr1">118,123</td><td class="t_tr1">274,628,114</td><td class="t_tr1">2020-01-13</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20005</td><td class="cfont2">06</td><td class="cfont2">10</td><td class="cfont2">33</td><td class="cfont2">34</td><td class="cfont2">35</td><td class="cfont4">01</td><td class="cfont4">03</td><td class="t_tr1">1,430,492,846</td><td class="t_tr1">7</td><td class="t_tr1">10,000,000</td><td class="t_tr1">198</td><td class="t_tr1">70,540</td><td class="t_tr1">306,743,622</td><td class="t_tr1">2020-01-11</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20004</td><td class="cfont2">17</td><td class="cfont2">20</td><td class="cfont2">21</td><td class="cfont2">29</td><td class="cfont2">30</td><td class="cfont4">05</td><td class="cfont4">09</td><td class="t_tr1">1,439,766,324</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">129</td><td class="t_tr1">80,526</td><td class="t_tr1">279,672,368</td><td class="t_tr1">2020-01-08</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20003</td><td class="cfont2">23</td><td class="cfont2">25</td><td class="cfont2">26</td><td class="cfont2">30</td><td class="cfont2">34</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,441,800,542</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">52</td><td class="t_tr1">333,491</td><td class="t_tr1">284,089,915</td><td class="t_tr1">2020-01-06</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20002</td><td class="cfont2">03</td><td class="cfont2">07</td><td class="cfont2">18</td><td class="cfont2">25</td><td class="cfont2">30</td><td class="cfont4">02</td><td class="cfont4">07</td><td class="t_tr1">1,369,911,816</td><td class="t_tr1">6</td><td class="t_tr1">9,908,421</td><td class="t_tr1">152</td><td class="t_tr1">89,594</td><td class="t_tr1">314,707,905</td><td class="t_tr1">2020-01-04</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20001</td><td class="cfont2">17</td><td class="cfont2">25</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">04</td><td class="cfont4">07</td><td class="t_tr1">1,387,021,575</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">95</td><td class="t_tr1">145,642</td><td class="t_tr1">290,178,486</td><td class="t_tr1">2020-01-01</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19150</td><td class="cfont2">07</td><td class="cfont2">11</td><td class="cfont2">12</td><td class="cfont2">16</td><td class="cfont2">33</td><td class="cfont4">05</td><td class="cfont4">07</td><td class="t_tr1">1,347,226,024</td><td class="t_tr1">19</td><td class="t_tr1">6,819,667</td><td class="t_tr1">115</td><td class="t_tr1">114,036</td><td class="t_tr1">296,427,458</td><td class="t_tr1">2019-12-30</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19149</td><td class="cfont2">01</td><td class="cfont2">02</td><td class="cfont2">07</td><td class="cfont2">33</td><td class="cfont2">35</td><td class="cfont4">06</td><td class="cfont4">10</td><td class="t_tr1">1,422,864,762</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">85</td><td class="t_tr1">177,509</td><td class="t_tr1">317,712,338</td><td class="t_tr1">2019-12-28</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19148</td><td class="cfont2">03</td><td class="cfont2">04</td><td class="cfont2">07</td><td class="cfont2">11</td><td class="cfont2">30</td><td class="cfont4">08</td><td class="cfont4">09</td><td class="t_tr1">1,357,789,884</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">91</td><td class="t_tr1">119,722</td><td class="t_tr1">289,764,338</td><td class="t_tr1">2019-12-25</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19147</td><td class="cfont2">09</td><td class="cfont2">12</td><td class="cfont2">19</td><td class="cfont2">22</td><td class="cfont2">33</td><td class="cfont4">07</td><td class="cfont4">08</td><td class="t_tr1">1,365,277,988</td><td class="t_tr1">6</td><td class="t_tr1">9,568,706</td><td class="t_tr1">101</td><td class="t_tr1">108,661</td><td class="t_tr1">291,104,886</td><td class="t_tr1">2019-12-23</td></tr> </tbody>
</table>
</div>
</div>
</td>
</tr>
</table>
<div class="chartIntro" id="chartbottom"> <span class="cfont4 intro_left"><!--~ad~247~removed--></span><br></span><span class="intro_right dlt_ad"><!--~ad~248~removed--></span>
<div class="clear"></div>
<br />
</div></div></div>
``
仅供参考