tickdownload/read_sql_data.py at master · hyphenOs/tickdownload · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#pylint: disable-msg=broad-except, global-statement

import pandas as pd
from sqlalchemy import desc

from tickerplot.sql.sqlalchemy_wrapper import execute_one
from tickerplot.sql.sqlalchemy_wrapper import create_or_get_all_scrips_table
from tickerplot.sql.sqlalchemy_wrapper import create_or_get_nse_equities_hist_data
from tickerplot.sql.sqlalchemy_wrapper import select_expr
from tickerplot.sql.sqlalchemy_wrapper import get_metadata
_DB_METADATA = None

def get_all_scrips_names_in_db(metadata=None):
    all_scrips_table = create_or_get_all_scrips_table(metadata=metadata)
    scrips_select_st = select_expr([all_scrips_table.c.nse_symbol]).\
                                   where(all_scrips_table.c.nse_traded == True)


    result = execute_one(scrips_select_st, engine=metadata.bind)
    symbols = [row[0] for row in result.fetchall()]

    return symbols

# FIXME metadata=None doesn't look correct, we need to pass db_meta perhaps?
def get_hist_data_as_dataframes_dict(metadata=None, limit=0, max_scrips=16000):
    lscrips = get_all_scrips_names_in_db(metadata=metadata)

    e = metadata.bind
    hist_data = create_or_get_nse_equities_hist_data(metadata=metadata)

    scripdata_dict = {}
    scrips = 0
    for scrip in lscrips:
        sql_st = select_expr([hist_data.c.date,
                            hist_data.c.open, hist_data.c.high,
                            hist_data.c.low, hist_data.c.close,
                            hist_data.c.volume, hist_data.c.delivery]).\
                                where(hist_data.c.symbol == scrip).\
                                        order_by(desc(hist_data.c.date))

        if limit and isinstance(limit, int) and limit > 0:
            sql_st = sql_st.limit(limit)

        scripdata = pd.io.sql.read_sql(sql_st, e)

        scripdata.columns = ['date', 'open', 'high', 'low', 'close', 'volume',
                            'delivery']
        scripdata.reset_index(inplace=True)
        scripdata.set_index(pd.DatetimeIndex(scripdata['date']), inplace=True)
        scripdata.drop('date', axis=1, inplace=True)
        scripdata_dict[scrip] = scripdata

        scrips += 1
        if scrips == max_scrips:
            break

    return scripdata_dict

def main(args):

    import argparse
    parser = argparse.ArgumentParser()

    # --dbpath option
    parser.add_argument("--dbpath",
                        help="Database URL to be used.",
                        dest="dbpath")

    args = parser.parse_args()

    # Make sure we can access the DB path if specified or else exit right here.
    if args.dbpath:
        try:
            global _DB_METADATA
            _DB_METADATA = get_metadata(args.dbpath)
        except Exception as e:
            print ("Not a valid DB URL: {} (Exception: {})".format(
                                                            args.dbpath, e))
            return -1

    get_hist_data_as_dataframes_dict()

    return 0

if __name__ == '__main__':

    import sys

    sys.exit(main(sys.argv))