@@ -496,8 +496,8 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
496
496
# Limit max reconstruction depth to 2:
497
497
if self ._reconstruct_start_interval is None :
498
498
self ._reconstruct_start_interval = interval
499
- if interval != self ._reconstruct_start_interval :
500
- logger .debug (f"{ self .ticker } : Price repair has hit max depth of 1 ('%s'->'%s')" , self ._reconstruct_start_interval , interval )
499
+ if interval != self ._reconstruct_start_interval and interval != nexts [ self . _reconstruct_start_interval ] :
500
+ logger .debug (f"{ self .ticker } : Price repair has hit max depth of 2 ('%s'->'%s'->'%s' )" , self ._reconstruct_start_interval , nexts [ self . _reconstruct_start_interval ] , interval )
501
501
return df
502
502
503
503
df = df .sort_index ()
@@ -760,7 +760,12 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
760
760
weights = weights [:, None ] # transpose
761
761
weights = np .tile (weights , len (calib_cols )) # 1D -> 2D
762
762
weights = weights [calib_filter ] # flatten
763
- ratio = np .average (ratios , weights = weights )
763
+ not1 = ~ np .isclose (ratios , 1.0 , rtol = 0.00001 )
764
+ if np .sum (not1 ) == len (calib_cols ):
765
+ # Only 1 calibration row in df_new is different to df_block so ignore
766
+ ratio = 1.0
767
+ else :
768
+ ratio = np .average (ratios , weights = weights )
764
769
logger .debug (f"Price calibration ratio (raw) = { ratio :6f} " )
765
770
ratio_rcp = round (1.0 / ratio , 1 )
766
771
ratio = round (ratio , 1 )
@@ -1235,19 +1240,26 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
1235
1240
# 100x errors into suspended intervals. Clue is no price change and 0 volume.
1236
1241
# Better to use last active trading interval as baseline.
1237
1242
f_no_activity = (df2 ['Low' ] == df2 ['High' ]) & (df2 ['Volume' ]== 0 )
1243
+ f_no_activity = f_no_activity | df2 [OHLC ].isna ().all (axis = 1 )
1238
1244
appears_suspended = f_no_activity .any () and np .where (f_no_activity )[0 ][0 ]== 0
1239
1245
f_active = ~ f_no_activity
1240
1246
idx_latest_active = np .where (f_active & np .roll (f_active , 1 ))[0 ]
1241
1247
if len (idx_latest_active ) == 0 :
1242
1248
idx_latest_active = None
1243
1249
else :
1244
- idx_latest_active = idx_latest_active [0 ]
1245
- logger .debug (f'price-repair-split: appears_suspended={ appears_suspended } , idx_latest_active={ idx_latest_active } ' )
1250
+ idx_latest_active = int (idx_latest_active [0 ])
1251
+ log_msg = f'price-repair-split: appears_suspended={ appears_suspended } , idx_latest_active={ idx_latest_active } '
1252
+ if idx_latest_active is not None :
1253
+ log_msg += f' ({ df .index [idx_latest_active ].date ()} )'
1254
+ logger .debug (log_msg )
1246
1255
1247
1256
if logger .isEnabledFor (logging .DEBUG ):
1248
1257
df_debug = df2 .copy ()
1249
- df_debug = df_debug .drop (['Adj Close' , 'Low' , 'High' , ' Volume' , 'Dividends' , 'Repaired?' ], axis = 1 , errors = 'ignore' )
1258
+ df_debug = df_debug .drop (['Adj Close' , 'Volume' , 'Dividends' , 'Repaired?' ], axis = 1 , errors = 'ignore' )
1250
1259
debug_cols = ['Low' , 'High' ]
1260
+ df_debug = df_debug .drop ([c for c in OHLC if c not in debug_cols ], axis = 1 , errors = 'ignore' )
1261
+ else :
1262
+ debug_cols = []
1251
1263
1252
1264
# Calculate daily price % change. To reduce effect of price volatility,
1253
1265
# calculate change for each OHLC column.
@@ -1359,87 +1371,44 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
1359
1371
return df
1360
1372
1361
1373
# if logger.isEnabledFor(logging.DEBUG):
1362
- # logger.debug(f"price-repair-split: my workings:")
1363
- # logger.debug('\n' + str(df_debug))
1374
+ # df_debug['i'] = list(range(0, df_debug.shape[0]))
1375
+ # df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
1376
+ # with pd.option_context('display.max_rows', None, 'display.max_columns', 10, 'display.width', 1000): # more options can be specified also
1377
+ # logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug))
1364
1378
1365
1379
def map_signals_to_ranges (f , f_up , f_down ):
1380
+ # Ensure 0th element is False, because True is nonsense
1381
+ if f [0 ]:
1382
+ f = np .copy (f ) ; f [0 ] = False
1383
+ f_up = np .copy (f_up ) ; f_up [0 ] = False
1384
+ f_down = np .copy (f_down ) ; f_down [0 ] = False
1385
+
1366
1386
if not f .any ():
1367
1387
return []
1368
1388
1369
1389
true_indices = np .where (f )[0 ]
1370
1390
ranges = []
1371
1391
1372
- idx_first_f = np .where (f )[0 ][0 ]
1373
- logger .debug (f'idx_latest_active={ idx_latest_active } idx_first_f={ idx_first_f } ' )
1374
- if appears_suspended and (idx_latest_active is None or idx_latest_active >= idx_first_f ):
1375
- # baseline = 2nd index, because no active trading since latest split error
1376
-
1377
- # First, process prices older than idx_latest_active:
1378
- if idx_latest_active is None :
1379
- true_indices_old = []
1380
- else :
1381
- true_indices_old = [i for i in true_indices if i > idx_latest_active ]
1382
- if len (true_indices_old ) > 0 :
1383
- for i in range (len (true_indices_old ) - 1 ):
1384
- if i % 2 == 0 :
1385
- if split > 1.0 :
1386
- adj = 'split' if f_down [true_indices_old [i ]] else '1.0/split'
1387
- else :
1388
- adj = '1.0/split' if f_down [true_indices_old [i ]] else 'split'
1389
- ranges .append ((true_indices_old [i ], true_indices_old [i + 1 ], adj ))
1390
-
1391
- if len (true_indices_old ) % 2 != 0 :
1392
- if split > 1.0 :
1393
- adj = 'split' if f_down [true_indices_old [- 1 ]] else '1.0/split'
1394
- else :
1395
- adj = '1.0/split' if f_down [true_indices_old [- 1 ]] else 'split'
1396
- ranges .append ((true_indices_old [- 1 ], len (f ), adj ))
1397
-
1398
- # Next, process prices more recent than idx_latest_active:
1399
- true_indices_recent = [i for i in true_indices if i not in true_indices_old ]
1400
- if len (true_indices_recent ) > 0 :
1392
+ for i in range (len (true_indices ) - 1 ):
1393
+ if i % 2 == 0 :
1401
1394
if split > 1.0 :
1402
- adj = 'split' if f_up [ true_indices_recent [ 0 ]] else '1.0/split'
1395
+ adj = 'split' if f_down [ true_indices [ i ]] else '1.0/split'
1403
1396
else :
1404
- adj = '1.0/split' if f_up [true_indices_recent [0 ]] else 'split'
1405
- ranges .append ((0 , true_indices_recent [0 ], adj ))
1406
-
1407
- for i in range (1 , len (true_indices_recent ) - 1 ):
1408
- if i % 2 == 1 :
1409
- if split > 1.0 :
1410
- adj = '1.0/split' if f_up [true_indices_recent [i ]] else 'split'
1411
- else :
1412
- adj = 'split' if f_up [true_indices_recent [i ]] else '1.0/split'
1413
- ranges .append ((true_indices_recent [i ], true_indices_recent [i + 1 ], adj ))
1414
-
1415
- if len (true_indices_recent ) % 2 == 0 :
1416
- if split > 1.0 :
1417
- adj = 'split' if f_down [true_indices_recent [- 1 ]] else '1.0/split'
1418
- else :
1419
- adj = '1.0/split' if f_down [true_indices_recent [- 1 ]] else 'split'
1420
- ranges .append ((true_indices_recent [- 1 ], len (f ), adj ))
1421
-
1422
- ranges = sorted (ranges , key = lambda x : x [0 ])
1423
-
1424
- else :
1425
- # baseline = 2nd index
1426
- for i in range (len (true_indices ) - 1 ):
1427
- if i % 2 == 0 :
1428
- if split > 1.0 :
1429
- adj = 'split' if f_down [true_indices [i ]] else '1.0/split'
1430
- else :
1431
- adj = '1.0/split' if f_down [true_indices [i ]] else 'split'
1432
- ranges .append ((true_indices [i ], true_indices [i + 1 ], adj ))
1397
+ adj = '1.0/split' if f_down [true_indices [i ]] else 'split'
1398
+ ranges .append ((true_indices [i ], true_indices [i + 1 ], adj ))
1433
1399
1434
- if len (true_indices ) % 2 != 0 :
1435
- if split > 1.0 :
1436
- adj = 'split' if f_down [true_indices [- 1 ]] else '1.0/split'
1437
- else :
1438
- adj = '1.0/split' if f_down [true_indices [- 1 ]] else 'split'
1439
- ranges .append ((true_indices [- 1 ], len (f ), adj ))
1400
+ if len (true_indices ) % 2 != 0 :
1401
+ if split > 1.0 :
1402
+ adj = 'split' if f_down [true_indices [- 1 ]] else '1.0/split'
1403
+ else :
1404
+ adj = '1.0/split' if f_down [true_indices [- 1 ]] else 'split'
1405
+ ranges .append ((true_indices [- 1 ], len (f ), adj ))
1440
1406
1441
1407
return ranges
1442
1408
1409
+ if idx_latest_active is not None :
1410
+ idx_rev_latest_active = df .shape [0 ] - 1 - idx_latest_active
1411
+ logger .debug (f'price-repair-split: idx_latest_active={ idx_latest_active } , idx_rev_latest_active={ idx_rev_latest_active } ' )
1443
1412
if correct_columns_individually :
1444
1413
f_corrected = np .full (n , False )
1445
1414
if correct_volume :
@@ -1455,7 +1424,38 @@ def map_signals_to_ranges(f, f_up, f_down):
1455
1424
OHLC_correct_ranges = [None , None , None , None ]
1456
1425
for j in range (len (OHLC )):
1457
1426
c = OHLC [j ]
1458
- ranges = map_signals_to_ranges (f [:, j ], f_up [:, j ], f_down [:, j ])
1427
+ idx_first_f = np .where (f )[0 ][0 ]
1428
+ if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f ):
1429
+ # Suspended midway during data date range.
1430
+ # 1: process data before suspension in index-ascending (date-descending) order.
1431
+ # 2: process data after suspension in index-descending order. Requires signals to be reversed,
1432
+ # then returned ranges to also be reversed, because this logic was originally written for
1433
+ # index-ascending (date-descending) order.
1434
+ fj = f [:, j ]
1435
+ f_upj = f_up [:, j ]
1436
+ f_downj = f_down [:, j ]
1437
+ ranges_before = map_signals_to_ranges (fj [idx_latest_active :], f_upj [idx_latest_active :], f_downj [idx_latest_active :])
1438
+ if len (ranges_before ) > 0 :
1439
+ # Shift each range back to global indexing
1440
+ for i in range (len (ranges_before )):
1441
+ r = ranges_before [i ]
1442
+ ranges_before [i ] = (r [0 ] + idx_latest_active , r [1 ] + idx_latest_active , r [2 ])
1443
+ f_rev_downj = np .flip (np .roll (f_upj , - 1 )) # correct
1444
+ f_rev_upj = np .flip (np .roll (f_downj , - 1 )) # correct
1445
+ f_revj = f_rev_upj | f_rev_downj
1446
+ ranges_after = map_signals_to_ranges (f_revj [idx_rev_latest_active :], f_rev_upj [idx_rev_latest_active :], f_rev_downj [idx_rev_latest_active :])
1447
+ if len (ranges_after ) > 0 :
1448
+ # Shift each range back to global indexing:
1449
+ for i in range (len (ranges_after )):
1450
+ r = ranges_after [i ]
1451
+ ranges_after [i ] = (r [0 ] + idx_rev_latest_active , r [1 ] + idx_rev_latest_active , r [2 ])
1452
+ # Flip range to normal ordering
1453
+ for i in range (len (ranges_after )):
1454
+ r = ranges_after [i ]
1455
+ ranges_after [i ] = (n - r [1 ], n - r [0 ], r [2 ])
1456
+ ranges = ranges_before ; ranges .extend (ranges_after )
1457
+ else :
1458
+ ranges = map_signals_to_ranges (f [:, j ], f_up [:, j ], f_down [:, j ])
1459
1459
logger .debug (f"column '{ c } ' ranges: { ranges } " )
1460
1460
if start_min is not None :
1461
1461
# Prune ranges that are older than start_min
@@ -1514,7 +1514,35 @@ def map_signals_to_ranges(f, f_up, f_down):
1514
1514
df2 .loc [f_corrected , 'Repaired?' ] = True
1515
1515
1516
1516
else :
1517
- ranges = map_signals_to_ranges (f , f_up , f_down )
1517
+ idx_first_f = np .where (f )[0 ][0 ]
1518
+ if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f ):
1519
+ # Suspended midway during data date range.
1520
+ # 1: process data before suspension in index-ascending (date-descending) order.
1521
+ # 2: process data after suspension in index-descending order. Requires signals to be reversed,
1522
+ # then returned ranges to also be reversed, because this logic was originally written for
1523
+ # index-ascending (date-descending) order.
1524
+ ranges_before = map_signals_to_ranges (f [idx_latest_active :], f_up [idx_latest_active :], f_down [idx_latest_active :])
1525
+ if len (ranges_before ) > 0 :
1526
+ # Shift each range back to global indexing
1527
+ for i in range (len (ranges_before )):
1528
+ r = ranges_before [i ]
1529
+ ranges_before [i ] = (r [0 ] + idx_latest_active , r [1 ] + idx_latest_active , r [2 ])
1530
+ f_rev_down = np .flip (np .roll (f_up , - 1 ))
1531
+ f_rev_up = np .flip (np .roll (f_down , - 1 ))
1532
+ f_rev = f_rev_up | f_rev_down
1533
+ ranges_after = map_signals_to_ranges (f_rev [idx_rev_latest_active :], f_rev_up [idx_rev_latest_active :], f_rev_down [idx_rev_latest_active :])
1534
+ if len (ranges_after ) > 0 :
1535
+ # Shift each range back to global indexing:
1536
+ for i in range (len (ranges_after )):
1537
+ r = ranges_after [i ]
1538
+ ranges_after [i ] = (r [0 ] + idx_rev_latest_active , r [1 ] + idx_rev_latest_active , r [2 ])
1539
+ # Flip range to normal ordering
1540
+ for i in range (len (ranges_after )):
1541
+ r = ranges_after [i ]
1542
+ ranges_after [i ] = (n - r [1 ], n - r [0 ], r [2 ])
1543
+ ranges = ranges_before ; ranges .extend (ranges_after )
1544
+ else :
1545
+ ranges = map_signals_to_ranges (f , f_up , f_down )
1518
1546
if start_min is not None :
1519
1547
# Prune ranges that are older than start_min
1520
1548
for i in range (len (ranges )- 1 , - 1 , - 1 ):
0 commit comments