全銘柄、全期間の日ごとの各種平均値、標準偏差を計算するSQL
stockdb のデータをもとに、全銘柄、全期間の日ごとの各種平均値、標準偏差を計算するSQLを書いたのでメモ。
- 終値、出来高、価格変動率のそれぞれについて、過去3,5,10,15,25,50,75日の平均値と標準偏差を一気に算出します。
- それなりに時間はかかります。確認した環境だと1時間くらいでした。
- Window関数すごい。
-- 市場が開いている日の一覧 CREATE MATERIALIZED VIEW days as ( WITH ds AS ( SELECT distinct date FROM rates ), x AS ( SELECT date, row_number() OVER (ORDER BY date) AS index, lag(date, 1, null) OVER (ORDER BY date) AS prev, lag(date, 3, null) OVER (ORDER BY date) AS before_3_days, lag(date, 5, null) OVER (ORDER BY date) AS before_5_days, lag(date, 10, null) OVER (ORDER BY date) AS before_10_days, lag(date, 15, null) OVER (ORDER BY date) AS before_15_days, lag(date, 25, null) OVER (ORDER BY date) AS before_25_days, lag(date, 50, null) OVER (ORDER BY date) AS before_50_days, lag(date, 75, null) OVER (ORDER BY date) AS before_75_days FROM ds ORDER BY date desc ) SELECT * FROM x WHERE prev IS NOT NULL ); -- 出来高0(volume=0)のデータが抜けているのでそれをを補完したビューを作る CREATE MATERIALIZED VIEW rates_filled AS ( WITH all_stock_and_days as ( SELECT d.*, s.id as stock_id FROM days as d, stocks as s ), x as ( SELECT a.stock_id, a.date, CASE WHEN r.date IS NOT NULL THEN r.date ELSE (SELECT max(date) FROM rates WHERE stock_id = a.stock_id and date <= a.date ) END as actual FROM all_stock_and_days AS a LEFT JOIN rates as r ON a.stock_id = r.stock_id and a.date = r.date ) SELECT x.stock_id, x.date, CASE WHEN x.actual = x.date THEN r.open ELSE r.close END as open, CASE WHEN x.actual = x.date THEN r.close ELSE r.close END as close, CASE WHEN x.actual = x.date THEN r.high ELSE r.close END as high, CASE WHEN x.actual = x.date THEN r.low ELSE r.close END as low, CASE WHEN x.actual = x.date THEN r.volume ELSE 0 END as volume FROM x LEFT JOIN rates as r ON x.stock_id = r.stock_id and x.actual = r.date ); CREATE UNIQUE INDEX rates_filled_stock_id_date_index ON rates_filled (stock_id, date); -- 前日からの価格変動率を計算 CREATE MATERIALIZED VIEW ratios AS ( WITH x AS ( SELECT d.*, s.id FROM days AS d, stocks AS s ) SELECT x.id as stock_id, x.date, (r1.close-r2.close)/r2.close as ratio FROM x LEFT JOIN rates_filled as r1 on x.id = r1.stock_id AND r1.date = x.date LEFT JOIN rates_filled as r2 on x.id = r2.stock_id AND r2.date = x.prev WHERE r2.close IS NOT NULL ); CREATE UNIQUE INDEX ratios_stock_id_date_index ON ratios (stock_id, date); -- 全銘柄、全期間の日ごとの各種平均値、標準偏差をを計算 -- 終値、出来高、価格変動率のそれぞれについて、過去3,5,10,15,25,50,75日の平均値と標準偏差を算出する CREATE MATERIALIZED VIEW ma AS ( SELECT r.stock_id, r.date, r.volume, avg(r.close) OVER from_3_days_ago as avg_close_3days, stddev(r.close) OVER from_3_days_ago as sd_close_3days, avg(r.close) OVER from_5_days_ago as avg_close_5days, stddev(r.close) OVER from_5_days_ago as sd_close_5days, avg(r.close) OVER from_10_days_ago as avg_close_10days, stddev(r.close) OVER from_10_days_ago as sd_close_10days, avg(r.close) OVER from_15_days_ago as avg_close_15days, stddev(r.close) OVER from_15_days_ago as sd_close_15days, avg(r.close) OVER from_25_days_ago as avg_close_25days, stddev(r.close) OVER from_25_days_ago as sd_close_25days, avg(r.close) OVER from_50_days_ago as avg_close_50days, stddev(r.close) OVER from_50_days_ago as sd_close_50days, avg(r.close) OVER from_75_days_ago as avg_close_75days, stddev(r.close) OVER from_75_days_ago as sd_close_75days, avg(ra.ratio) OVER from_3_days_ago as avg_ratio_3days, stddev(ra.ratio) OVER from_3_days_ago as sd_ratio_3days, avg(ra.ratio) OVER from_5_days_ago as avg_ratio_5days, stddev(ra.ratio) OVER from_5_days_ago as sd_ratio_5days, avg(ra.ratio) OVER from_10_days_ago as avg_ratio_10days, stddev(ra.ratio) OVER from_10_days_ago as sd_ratio_10days, avg(ra.ratio) OVER from_15_days_ago as avg_ratio_15days, stddev(ra.ratio) OVER from_15_days_ago as sd_ratio_15days, avg(ra.ratio) OVER from_25_days_ago as avg_ratio_25days, stddev(ra.ratio) OVER from_25_days_ago as sd_ratio_25days, avg(ra.ratio) OVER from_50_days_ago as avg_ratio_50days, stddev(ra.ratio) OVER from_50_days_ago as sd_ratio_50days, avg(ra.ratio) OVER from_75_days_ago as avg_ratio_75days, stddev(ra.ratio) OVER from_75_days_ago as sd_ratio_75days, avg(r.volume) OVER from_3_days_ago as avg_volume_3days, stddev(r.volume) OVER from_3_days_ago as sd_volume_3days, avg(r.volume) OVER from_5_days_ago as avg_volume_5days, stddev(r.volume) OVER from_5_days_ago as sd_volume_5days, avg(r.volume) OVER from_10_days_ago as avg_volume_10days, stddev(r.volume) OVER from_10_days_ago as sd_volume_10days, avg(r.volume) OVER from_15_days_ago as avg_volume_15days, stddev(r.volume) OVER from_15_days_ago as sd_volume_15days, avg(r.volume) OVER from_25_days_ago as avg_volume_25days, stddev(r.volume) OVER from_25_days_ago as sd_volume_25days, avg(r.volume) OVER from_50_days_ago as avg_volume_50days, stddev(r.volume) OVER from_50_days_ago as sd_volume_50days, avg(r.volume) OVER from_75_days_ago as avg_volume_75days, stddev(r.volume) OVER from_75_days_ago as sd_volume_75days FROM rates_filled as r LEFT JOIN ratios AS ra ON ra.stock_id = r.stock_id AND ra.date = r.date WINDOW from_3_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING ), from_5_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING ), from_10_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING ), from_15_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 14 FOLLOWING ), from_25_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 24 FOLLOWING ), from_50_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 49 FOLLOWING ), from_75_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 74 FOLLOWING ) order by r.date desc ); CREATE UNIQUE INDEX ma_stock_id_date_index ON ma(stock_id, date);