1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| import time, datetime from spark_util import spark_start
if 'spark' not in globals(): spark = spark_start('data_explore_zt', executor_core=2) spark.sparkContext.setLogLevel("ERROR")
def show(sdf, n=None, truncate=True): import pandas as pd pd.options.display.max_columns = 100 if not truncate: pd.options.display.max_colwidth = 999 n = n or (5 if len(sdf.columns) > 10 else 10) return display(pd.DataFrame([x.asDict() for x in sdf.take(n)]))
def get_date(n=-1, base_date=None, base_date_FMT=None, FMT='%Y%m%d'): if base_date is None: try: import pytz base_datetime = datetime.datetime.now(pytz.timezone('Asia/Shanghai')) except Exception: base_datetime = datetime.datetime.now() else: base_datetime = datetime.datetime.strptime(base_date, base_date_FMT or FMT) return datetime.datetime.strftime(base_datetime + datetime.timedelta(n), FMT)
def date_range(start, end, end_include=False, step=1, FMT="%Y%m%d"): strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime days = (strptime(end, FMT) - strptime(start, FMT)).days days = days + int(step/abs(step)) if end_include else days return [strftime(strptime(start, FMT) + datetime.timedelta(i), FMT) for i in range(0, days, step)]
|