Example program: Plotter

import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

from tectonic import TectonicDB
import pandas as pd
import numpy as np
from math import floor, ceil
import copy
import time

from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as ticker


class OrderBookPlot(object):
    def __init__(self, plt, market, start, finish):
        self.db = TectonicDB(host="35.196.130.153", port=9001)
        self.db.cmd("USE {}".format(market).encode())[1]
        # self.start = 1515019167 
        # self.finish = 1515022767 
        self.start = start 
        self.finish = finish 

        self.plt = plt

        self.plt.grid(False)
        self.plt.axis('on')
        self.plt.style.use('dark_background')

        fig, ax = self.plt.subplots(figsize=(20, 10), dpi=100)
        # 1 btc = 1e8 satoshi
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.8f'))
        # set x axis as date
        N = 10
        ind = np.arange(N)  # the evenly spaced plot indices
        def format_date(x, pos=None):
            x = int(x/1000.)
            return time.strftime('%m-%d %H:%M:%S', time.localtime(x))
        xfmt = ticker.FuncFormatter(format_date)
        ax.xaxis.set_major_formatter(xfmt)
        self.plt.xlim(self.start * 1000, self.finish * 1000)
        self.plt.title(market)

        self.tick_bins_cnt = 2000
        self.step_bins_cnt = 2000


        data = self.db.cmd("GET ALL FROM {} TO {} AS CSV".format(self.start, self.finish).encode())[1]
        # data = self.db.cmd("GET ALL FROM 1514764800 TO 1514768400 AS CSV\n")[1]
        self.df = self.__csv_to_df(data)
        print(len(self.df))

    def plot_trades(self):
        self.__separate()
        self.__plot_trades()


    def plot_pl(self):
        self.prices = np.array(self.df["price"])
        self.rejected = self.__reject_outliers(self.prices, m=4)
        self.updates = self.to_updates(self.df)
        self.__plot_price_levels()

    def plot_ba(self):
        self.ob = self.__get_ob() # expensive
        self.best_ba_df = self.__best_ba() # expensive
        self.__plot_best_ba()

    # def plot_trades(self):


    def to_updates(self, events):

        sizes, boundaries = np.histogram(self.rejected, self.tick_bins_cnt)
        def into_tick_bin(price):
            for (s, b) in zip(boundaries, boundaries[1:]):
                if b > price > s:
                    return s
            return False

        min_ts = float(self.df['ts'].min())
        min_ts = int(floor(min_ts))

        max_ts = float(self.df['ts'].max())
        max_ts = int(ceil(max_ts))

        step = (max_ts - min_ts) / float(self.step_bins_cnt)
        step = int(ceil(step))
        step_thresholds = range(min_ts, max_ts, step)

        def into_step_bin(time):
            for (s, b) in zip(step_thresholds, step_thresholds[1:]):
                if s < time and time < b:
                    return b
            return False

        updates = {}
        for (_i, row) in self.df.iterrows():
            ts, seq, is_trade, is_bid, price, size = row
            price = into_tick_bin(price)
            time = into_step_bin(ts)
            if not float(price) or not (time):
                continue
            if price not in updates:
                updates[price] = {}
            if time not in updates[price]:
                updates[price][time] = 0
            updates[price][time] += size;
        for time_dict in list(updates.values()):
            for size in list(time_dict.values()):
                if size != 0:
                    time_dict[self.finish * 1000] = size

        return updates

    def __reject_outliers(self, data, m = 2.):
        d = np.abs(data - np.median(data))
        mdev = np.median(d)
        s = d/mdev if mdev else 0.
        return data[s<m]

    def __separate(self):

        cancelled = []
        created = []
        current_level = {}

        for row in self.df.iterrows():
            _, (ts, seq, is_trade, is_bid, price, size) = row
            if not is_trade:
                prev = current_level[price] if price in current_level else 0
                if (size == 0 or size <= prev):
                    cancelled.append((ts, seq, prev - size, price, is_bid, is_trade))
                elif (size > prev):
                    created.append((ts, seq, size - prev, price, is_bid, is_trade))
                else: # size == prev
                    raise Exception("Impossible")

            current_level[price] = size

        self.cancelled = pd.DataFrame.from_records(cancelled)
        self.created =   pd.DataFrame.from_records(created)
        self.trades = self.df[self.df['is_trade']]

        # sanity check
        assert len(cancelled) + len(created) + len(self.trades) == len(self.df)

    def __csv_to_df(self, raw_data):
        raw_data = str(raw_data, 'utf-8')
        csv = StringIO("ts,seq,is_trade,is_bid,price,size\n" + raw_data)
        df = pd.read_csv(csv, dtype={'ts': np.float, 'seq': np.int16, 'is_trade': np.bool, 'is_bid': np.bool, 'price': np.float, 'size': np.float32})
        df.set_index("ts")
        df = df[:-1]
        df.ts *= 1000
        df.ts = df.ts.astype(int)
        return df

    def __get_ob(self):
        most_recent_orderbook = {"bids": {}, "asks": {}}
        orderbook = {}
        for seq, e in self.df.iterrows():
            if e.is_trade:
                continue
            if e.ts not in orderbook:
                for side, sidedicts in most_recent_orderbook.items():
                    for price, size in sidedicts.items():
                        if size == 0:
                            del sidedicts[price]
                most_recent_orderbook["bids" if e.is_bid else "asks"][e.price] = e["size"]
                orderbook[e.ts] = copy.deepcopy(most_recent_orderbook)        
        return orderbook

    def __best_ba(self):
        best_bids_asks = []

        for ts, ob in self.ob.items():
            try:
                best_bid = max(ob["bids"].keys())
            except: # sometimes L in max(L) is []
                continue
            try:
                best_ask = min(ob["asks"].keys())
            except:
                continue
            best_bids_asks.append((ts, best_bid, best_ask))

        best_bids_asks = pd.DataFrame.from_records(best_bids_asks, columns=["ts", "best_bid", "best_ask"], index="ts").sort_index()
        return best_bids_asks

    def __plot_best_ba(self):
        bhys = []    # bid - horizontal - ys
        bhxmins = [] # bid - horizontal - xmins
        bhxmaxs = [] # ...
        bvxs = []
        bvymins = []
        bvymaxs = []
        ahys = []
        ahxmins = []
        ahxmaxs = []
        avxs = []
        avymins = []
        avymaxs = []

        bba_tuple = self.best_ba_df.to_records()
        for (ts1, b1, a1), (ts2, b2, a2) in zip(bba_tuple, bba_tuple[1:]): # bigram
            bhys.append(b1)
            bhxmins.append(ts1)
            bhxmaxs.append(ts2)
            bvxs.append(ts2)
            bvymins.append(b1)
            bvymaxs.append(b2)
            ahys.append(a1)
            ahxmins.append(ts1)
            ahxmaxs.append(ts2)
            avxs.append(ts2)
            avymins.append(a1)
            avymaxs.append(a2)

        self.plt.hlines(bhys, bhxmins, bhxmaxs, color="green", lw=3, alpha=1)
        self.plt.vlines(bvxs, bvymins, bvymaxs, color="green", lw=3, alpha=1)
        self.plt.hlines(ahys, ahxmins, ahxmaxs, color="red", lw=3, alpha=1)
        self.plt.vlines(avxs, avymins, avymaxs, color="red", lw=3, alpha=1)

    def __plot_price_levels(self, zorder=0, max_threshold=5000, min_threshold=500):    
        ys = []
        xmins = []
        xmaxs = []
        colors = []

        for price, vdict in self.updates.items():
            vtuples = vdict.items()
            vtuples = sorted(vtuples, key=lambda tup: tup[0])
            for (t1, s1), (t2, s2) in zip(vtuples, vtuples[1:]): # bigram
                xmins.append(t1)
                xmaxs.append(t2)
                ys.append(price)
                if s1 < min_threshold:
                    colors.append((0, 0, 0))
                elif s1 > max_threshold:
                    colors.append((0, 1, 1))
                else:
                    colors.append((0, s1/max_threshold, s1/max_threshold))
        self.plt.hlines(ys, xmins, xmaxs, color=colors, lw=3, alpha=1, zorder=zorder)

    def __plot_trades(self, zorder=10):
        max_size = self.trades["size"].max()
        trades_colors = list(map(lambda is_bid: "#ff0000" if is_bid else "#00ff00", self.trades.is_bid))
        self.plt.scatter(self.trades["ts"], self.trades["price"], s=self.trades["size"]/max_size*100000, color=trades_colors, zorder=zorder)

if __name__ == "__main__":
    ob = OrderBookPlot(plt, market, current_time - 60 * int(minutes), current_time)
    ob.plot_pl()
    # ob.plot_ba()
    ob.plot_trades()
    print('done plotting')
    buf = io.BytesIO()
    plt.savefig(buf, format='png')

This generates plots like the ones on my blog post.

results matching ""

    No results matching ""