抓彩票信息.md

import json
import os
import sys
from collections import defaultdict
from functools import reduce

from lxml import etree
import requests

base_path = os.path.dirname(os.path.realpath(sys.argv[0]))
file_html = "lottery.html"
file_json = "lottery.json"
file_html_path = os.path.join(base_path, file_html)
file_json_path = os.path.join(base_path, file_json)


def handler_data():
    result = []

    html = etree.parse('./lottery.html', etree.HTMLParser())
    # result = etree.tostring(html)
    # print(result.decode('utf-8'))

    data = html.xpath("//tbody[@id='tdata']/tr[@class='t_tr1']/td")

    start = 0
    end = 15
    while True:
        start += 15
        end += 15
        if end <= len(data):
            cols = data[start:end]
            tmp = {
                'qihao': cols[0].text,
                '1': cols[1].text,
                '2': cols[2].text,
                '3': cols[3].text,
                '4': cols[4].text,
                '5': cols[5].text,
                '6': cols[6].text,
                '7': cols[7].text,
                'day': cols[14].text,
            }
            # print(tmp)
            result.append(tmp)
        else:
            break
    return result


def init_data():
    if not os.path.exists(file_json):
        url = 'https://datachart.500.com/dlt/history/newinc/history.php?limit=100000&sort=0'
        ret = requests.get(url)
        with open("lottery.html", "w", encoding='utf-8') as f:
            f.write(ret.text)

        r = handler_data()

        with open(file_json_path, "w") as f:
            f.write(json.dumps(r, ensure_ascii=False) + '\n')


def get_new_data():
    url = 'https://datachart.500.com/dlt/history/newinc/history.php?limit=30&sort=0'
    ret = requests.get(url)
    with open("lottery.html", "w", encoding='utf-8') as f:
        f.write(ret.text)


def read_data():
    with open(file_json_path, 'r') as f:
        data = json.loads(f.read())
    return data


def add_data():
    get_new_data()
    adds_data = handler_data()

    old_data = read_data()

    for ad in adds_data:
        if ad not in old_data:
            old_data.append(ad)

    with open(file_json_path, "w") as f:
        f.write(json.dumps(old_data, ensure_ascii=False) + '\n')


def random_weight(weight_data):
    import random
    total = sum(weight_data.values())  # 权重求和
    ra = random.uniform(0, total)  # 在0与权重和之前获取一个随机数
    curr_sum = 0
    ret = None
    keys = weight_data.keys()
    for k in keys:
        curr_sum += weight_data[k]  # 在遍历中,累加当前权重值
        if ra <= curr_sum:  # 当随机数<=当前权重和时,返回权重key
            ret = k
            break
    return ret


def gen_num_queue(src, count):
    ret = []

    now_count = 0
    while now_count < count:
        num = random_weight(src)
        if num not in ret:
            ret.append(num)
            now_count += 1
    return ret


def handler():
    data = read_data()
    # print(data)
    ret1 = defaultdict(int)
    ret2 = defaultdict(int)
    for d in data:
        for i in ('1', '2', '3', '4', '5'):
            ret1[d[i]] += 1
    for d in data:
        for i in ('6', '7'):
            ret2[d[i]] += 1
    # print(json.dumps(ret1, indent=4))
    # print(json.dumps(ret2, indent=4))
    g = lambda x, y: "%s %s" % (x, y)
    print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
    print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
    print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
    print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')
    print(reduce(g, gen_num_queue(ret1, 5) + gen_num_queue(ret2, 2)) + '\n')


if __name__ == '__main__':
    init_data()
    add_data()
    handler()

lottery.html


<div id="container">
<div class="warp">
        <div class="wrap_datachart">
    <table width="100%" border="0" cellspacing="0" cellpadding="0">        
        <tr>
            <td>
                <div class="wrap_datachart">
                    <div class="tubiao_box">
                        <div class="tubiao_box_t">
                            <table width="81%" border="0" cellspacing="0" cellpadding="0">
                                <tr>
                                    <td width="200">
                                        <span class="span_left">超级大乐透:<span class="cfont2">开奖信息</span></span></td>
                                    <td align="center" style="font-weight: normal">
                                                                            <a href="javascript:;" style="color:red">最近30期</a>
                                                                        <a href="javascript:;" onclick="getChartdata(50,0)">最近50期</a>
                                                                        <a href="javascript:;" onclick="getChartdata(100,0)">最近100期</a>
                                                                            <input id="start" name="start" value="19147" size="10" />
                                        期 至<input id="end" name="end" value="20026" size="10" />
                                        期 <img src="/images/info/tubiao/cx01.gif" align="absmiddle" border="0" style="cursor:pointer;" onclick="getChartdata(30,0,1)" />
                                    </td>
                                </tr>
                            </table>
                            <div style="width: 950px; height: 0px; clear: both; line-height: 0px; font-size: 0px;"></div>
                        </div>
                    </div>
                    <div class="chart">
                        <table width="100%" border="1" cellpadding="3" cellspacing="0" bordercolor="#C0DCF8" id="tablelist">
                            <tr>
                                <!--<td class="td_title1 dot_paixu2">摇奖用球/套</td>-->
                                <td  class="td_title1" style="cursor:pointer;"width="80" align="center" onclick="getChartdata(30,1);" rowspan="2"><strong>期号</strong><img src="/images/info/tubiao/dot_paixu2.gif" border="0" align="absmiddle" id="imgorder">&nbsp;<img src="/images/info/tubiao/paixu.gif" border="0" align="top"  /></td>
                                <td class="td_title1" colspan="5" >å‰åŒºå·ç </td>
                                <td class="td_title1" colspan="2" rowspan="2">后区</td>
                                <td class="td_title1" rowspan="2">å¥–æ± å¥–é‡‘(元)</td>
                                <td class="td_title1"  colspan="2">一等奖</td>
                                <td class="td_title1"  colspan="2">二等奖</td>
                                <td class="td_title1" rowspan="2">总投注额(元)</td>
                                <td class="td_title1" rowspan="2">开奖日期</td>
                            </tr>
                            <tr class="t_tr1">

                                <td class="t_tr1">1</td>
                                <td class="t_tr1">2</td>
                                <td class="t_tr1">3</td>
                                <td class="t_tr1">4</td>
                                <td class="t_tr1">5</td>


                                <td class="t_tr1">注数</td>
                                <td class="t_tr1">奖金(元)</td>
                                <td class="t_tr1">注数</td>
                                <td class="t_tr1">奖金(元)</td>

                             </tr>
                             <tbody id="tdata">
                            <tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20026</td><td class="cfont2">03</td><td class="cfont2">15</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">08</td><td class="cfont4">12</td><td class="t_tr1">1,423,923,368</td><td class="t_tr1">7</td><td class="t_tr1">9,777,969</td><td class="t_tr1">97</td><td class="t_tr1">148,263</td><td class="t_tr1">267,647,668</td><td class="t_tr1">2020-04-20</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20025</td><td class="cfont2">02</td><td class="cfont2">14</td><td class="cfont2">21</td><td class="cfont2">22</td><td class="cfont2">26</td><td class="cfont4">05</td><td class="cfont4">07</td><td class="t_tr1">1,442,053,199</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">81</td><td class="t_tr1">158,628</td><td class="t_tr1">290,980,102</td><td class="t_tr1">2020-04-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20024</td><td class="cfont2">03</td><td class="cfont2">05</td><td class="cfont2">06</td><td class="cfont2">24</td><td class="cfont2">30</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,437,049,655</td><td class="t_tr1">4</td><td class="t_tr1">9,810,302</td><td class="t_tr1">143</td><td class="t_tr1">56,756</td><td class="t_tr1">274,944,706</td><td class="t_tr1">2020-04-15</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20023</td><td class="cfont2">04</td><td class="cfont2">11</td><td class="cfont2">23</td><td class="cfont2">26</td><td class="cfont2">30</td><td class="cfont4">01</td><td class="cfont4">06</td><td class="t_tr1">1,448,119,562</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">76</td><td class="t_tr1">144,157</td><td class="t_tr1">266,526,290</td><td class="t_tr1">2020-04-13</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20022</td><td class="cfont2">04</td><td class="cfont2">14</td><td class="cfont2">20</td><td class="cfont2">28</td><td class="cfont2">35</td><td class="cfont4">02</td><td class="cfont4">03</td><td class="t_tr1">1,459,915,832</td><td class="t_tr1">1</td><td class="t_tr1">10,000,000</td><td class="t_tr1">85</td><td class="t_tr1">132,699</td><td class="t_tr1">279,808,308</td><td class="t_tr1">2020-04-11</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20021</td><td class="cfont2">10</td><td class="cfont2">12</td><td class="cfont2">24</td><td class="cfont2">31</td><td class="cfont2">33</td><td class="cfont4">04</td><td class="cfont4">09</td><td class="t_tr1">1,418,256,965</td><td class="t_tr1">9</td><td class="t_tr1">8,087,069</td><td class="t_tr1">129</td><td class="t_tr1">94,599</td><td class="t_tr1">255,667,147</td><td class="t_tr1">2020-04-08</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20020</td><td class="cfont2">08</td><td class="cfont2">10</td><td class="cfont2">12</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont4">05</td><td class="cfont4">09</td><td class="t_tr1">1,452,932,120</td><td class="t_tr1">3</td><td class="t_tr1">10,000,000</td><td class="t_tr1">76</td><td class="t_tr1">130,721</td><td class="t_tr1">229,103,689</td><td class="t_tr1">2020-04-06</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20019</td><td class="cfont2">01</td><td class="cfont2">12</td><td class="cfont2">22</td><td class="cfont2">23</td><td class="cfont2">33</td><td class="cfont4">01</td><td class="cfont4">12</td><td class="t_tr1">1,445,697,714</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">65</td><td class="t_tr1">206,046</td><td class="t_tr1">251,571,436</td><td class="t_tr1">2020-04-04</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20018</td><td class="cfont2">09</td><td class="cfont2">14</td><td class="cfont2">16</td><td class="cfont2">24</td><td class="cfont2">26</td><td class="cfont4">04</td><td class="cfont4">12</td><td class="t_tr1">1,443,693,869</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">69</td><td class="t_tr1">160,807</td><td class="t_tr1">230,454,828</td><td class="t_tr1">2020-04-01</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20017</td><td class="cfont2">01</td><td class="cfont2">06</td><td class="cfont2">07</td><td class="cfont2">09</td><td class="cfont2">10</td><td class="cfont4">02</td><td class="cfont4">11</td><td class="t_tr1">1,394,320,154</td><td class="t_tr1">6</td><td class="t_tr1">8,730,653</td><td class="t_tr1">150</td><td class="t_tr1">56,374</td><td class="t_tr1">220,830,654</td><td class="t_tr1">2020-03-30</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20016</td><td class="cfont2">04</td><td class="cfont2">08</td><td class="cfont2">19</td><td class="cfont2">31</td><td class="cfont2">35</td><td class="cfont4">01</td><td class="cfont4">02</td><td class="t_tr1">1,414,113,824</td><td class="t_tr1">3</td><td class="t_tr1">10,000,000</td><td class="t_tr1">61</td><td class="t_tr1">217,312</td><td class="t_tr1">235,862,341</td><td class="t_tr1">2020-03-28</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20015</td><td class="cfont2">01</td><td class="cfont2">07</td><td class="cfont2">23</td><td class="cfont2">24</td><td class="cfont2">26</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,389,102,104</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">42</td><td class="t_tr1">280,807</td><td class="t_tr1">213,742,602</td><td class="t_tr1">2020-03-25</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20014</td><td class="cfont2">03</td><td class="cfont2">07</td><td class="cfont2">09</td><td class="cfont2">17</td><td class="cfont2">21</td><td class="cfont4">03</td><td class="cfont4">06</td><td class="t_tr1">1,368,119,121</td><td class="t_tr1">4</td><td class="t_tr1">8,706,884</td><td class="t_tr1">149</td><td class="t_tr1">44,032</td><td class="t_tr1">203,414,775</td><td class="t_tr1">2020-03-23</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20013</td><td class="cfont2">01</td><td class="cfont2">04</td><td class="cfont2">05</td><td class="cfont2">11</td><td class="cfont2">35</td><td class="cfont4">03</td><td class="cfont4">09</td><td class="t_tr1">1,382,155,015</td><td class="t_tr1">9</td><td class="t_tr1">7,976,833</td><td class="t_tr1">45</td><td class="t_tr1">278,794</td><td class="t_tr1">210,216,729</td><td class="t_tr1">2020-03-21</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20012</td><td class="cfont2">01</td><td class="cfont2">06</td><td class="cfont2">18</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">01</td><td class="cfont4">03</td><td class="t_tr1">1,417,484,531</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">35</td><td class="t_tr1">268,738</td><td class="t_tr1">180,512,101</td><td class="t_tr1">2020-03-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20011</td><td class="cfont2">02</td><td class="cfont2">13</td><td class="cfont2">19</td><td class="cfont2">22</td><td class="cfont2">23</td><td class="cfont4">02</td><td class="cfont4">07</td><td class="t_tr1">1,392,702,843</td><td class="t_tr1">5</td><td class="t_tr1">9,096,183</td><td class="t_tr1">58</td><td class="t_tr1">158,385</td><td class="t_tr1">169,402,246</td><td class="t_tr1">2020-03-16</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20010</td><td class="cfont2">01</td><td class="cfont2">08</td><td class="cfont2">13</td><td class="cfont2">24</td><td class="cfont2">32</td><td class="cfont4">02</td><td class="cfont4">09</td><td class="t_tr1">1,408,398,439</td><td class="t_tr1">6</td><td class="t_tr1">8,599,471</td><td class="t_tr1">70</td><td class="t_tr1">114,765</td><td class="t_tr1">193,022,506</td><td class="t_tr1">2020-03-14</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20009</td><td class="cfont2">19</td><td class="cfont2">29</td><td class="cfont2">31</td><td class="cfont2">34</td><td class="cfont2">35</td><td class="cfont4">06</td><td class="cfont4">10</td><td class="t_tr1">1,426,304,213</td><td class="t_tr1">6</td><td class="t_tr1">10,000,000</td><td class="t_tr1">102</td><td class="t_tr1">152,272</td><td class="t_tr1">306,054,010</td><td class="t_tr1">2020-01-20</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20008</td><td class="cfont2">14</td><td class="cfont2">17</td><td class="cfont2">19</td><td class="cfont2">24</td><td class="cfont2">32</td><td class="cfont4">01</td><td class="cfont4">06</td><td class="t_tr1">1,436,007,521</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">57</td><td class="t_tr1">344,283</td><td class="t_tr1">312,604,530</td><td class="t_tr1">2020-01-18</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20007</td><td class="cfont2">02</td><td class="cfont2">10</td><td class="cfont2">19</td><td class="cfont2">24</td><td class="cfont2">30</td><td class="cfont4">05</td><td class="cfont4">08</td><td class="t_tr1">1,413,642,399</td><td class="t_tr1">12</td><td class="t_tr1">7,162,433</td><td class="t_tr1">131</td><td class="t_tr1">96,229</td><td class="t_tr1">275,894,030</td><td class="t_tr1">2020-01-15</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20006</td><td class="cfont2">09</td><td class="cfont2">22</td><td class="cfont2">25</td><td class="cfont2">31</td><td class="cfont2">32</td><td class="cfont4">08</td><td class="cfont4">12</td><td class="t_tr1">1,474,266,994</td><td class="t_tr1">1</td><td class="t_tr1">10,000,000</td><td class="t_tr1">106</td><td class="t_tr1">118,123</td><td class="t_tr1">274,628,114</td><td class="t_tr1">2020-01-13</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20005</td><td class="cfont2">06</td><td class="cfont2">10</td><td class="cfont2">33</td><td class="cfont2">34</td><td class="cfont2">35</td><td class="cfont4">01</td><td class="cfont4">03</td><td class="t_tr1">1,430,492,846</td><td class="t_tr1">7</td><td class="t_tr1">10,000,000</td><td class="t_tr1">198</td><td class="t_tr1">70,540</td><td class="t_tr1">306,743,622</td><td class="t_tr1">2020-01-11</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20004</td><td class="cfont2">17</td><td class="cfont2">20</td><td class="cfont2">21</td><td class="cfont2">29</td><td class="cfont2">30</td><td class="cfont4">05</td><td class="cfont4">09</td><td class="t_tr1">1,439,766,324</td><td class="t_tr1">4</td><td class="t_tr1">10,000,000</td><td class="t_tr1">129</td><td class="t_tr1">80,526</td><td class="t_tr1">279,672,368</td><td class="t_tr1">2020-01-08</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20003</td><td class="cfont2">23</td><td class="cfont2">25</td><td class="cfont2">26</td><td class="cfont2">30</td><td class="cfont2">34</td><td class="cfont4">03</td><td class="cfont4">07</td><td class="t_tr1">1,441,800,542</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">52</td><td class="t_tr1">333,491</td><td class="t_tr1">284,089,915</td><td class="t_tr1">2020-01-06</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20002</td><td class="cfont2">03</td><td class="cfont2">07</td><td class="cfont2">18</td><td class="cfont2">25</td><td class="cfont2">30</td><td class="cfont4">02</td><td class="cfont4">07</td><td class="t_tr1">1,369,911,816</td><td class="t_tr1">6</td><td class="t_tr1">9,908,421</td><td class="t_tr1">152</td><td class="t_tr1">89,594</td><td class="t_tr1">314,707,905</td><td class="t_tr1">2020-01-04</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">20001</td><td class="cfont2">17</td><td class="cfont2">25</td><td class="cfont2">26</td><td class="cfont2">32</td><td class="cfont2">34</td><td class="cfont4">04</td><td class="cfont4">07</td><td class="t_tr1">1,387,021,575</td><td class="t_tr1">2</td><td class="t_tr1">10,000,000</td><td class="t_tr1">95</td><td class="t_tr1">145,642</td><td class="t_tr1">290,178,486</td><td class="t_tr1">2020-01-01</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19150</td><td class="cfont2">07</td><td class="cfont2">11</td><td class="cfont2">12</td><td class="cfont2">16</td><td class="cfont2">33</td><td class="cfont4">05</td><td class="cfont4">07</td><td class="t_tr1">1,347,226,024</td><td class="t_tr1">19</td><td class="t_tr1">6,819,667</td><td class="t_tr1">115</td><td class="t_tr1">114,036</td><td class="t_tr1">296,427,458</td><td class="t_tr1">2019-12-30</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19149</td><td class="cfont2">01</td><td class="cfont2">02</td><td class="cfont2">07</td><td class="cfont2">33</td><td class="cfont2">35</td><td class="cfont4">06</td><td class="cfont4">10</td><td class="t_tr1">1,422,864,762</td><td class="t_tr1">0</td><td class="t_tr1">0</td><td class="t_tr1">85</td><td class="t_tr1">177,509</td><td class="t_tr1">317,712,338</td><td class="t_tr1">2019-12-28</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19148</td><td class="cfont2">03</td><td class="cfont2">04</td><td class="cfont2">07</td><td class="cfont2">11</td><td class="cfont2">30</td><td class="cfont4">08</td><td class="cfont4">09</td><td class="t_tr1">1,357,789,884</td><td class="t_tr1">5</td><td class="t_tr1">10,000,000</td><td class="t_tr1">91</td><td class="t_tr1">119,722</td><td class="t_tr1">289,764,338</td><td class="t_tr1">2019-12-25</td></tr><tr class="t_tr1"><!--<td>2</td>--><td class="t_tr1">19147</td><td class="cfont2">09</td><td class="cfont2">12</td><td class="cfont2">19</td><td class="cfont2">22</td><td class="cfont2">33</td><td class="cfont4">07</td><td class="cfont4">08</td><td class="t_tr1">1,365,277,988</td><td class="t_tr1">6</td><td class="t_tr1">9,568,706</td><td class="t_tr1">101</td><td class="t_tr1">108,661</td><td class="t_tr1">291,104,886</td><td class="t_tr1">2019-12-23</td></tr>                            </tbody>
                        </table>
                    </div>
                </div>
            </td>
        </tr>
    </table>
    <div class="chartIntro" id="chartbottom"> <span class="cfont4 intro_left"><!--~ad~247~removed--></span><br></span><span class="intro_right dlt_ad"><!--~ad~248~removed--></span>
      <div class="clear"></div>
      <br />
    </div></div></div>

``

仅供参考
目录