forked from gnarayan/decat_pointings
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdastro.py
1361 lines (1101 loc) · 57.9 KB
/
pdastro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
'''
wrapper around pandas with convenience functions to ease handling of tables
A. Rest
'''
import sys,os,re,types,copy,io
import numpy as np
from astropy.time import Time
import astropy.io.fits as fits
import pandas as pd
from pandas.core.dtypes.common import is_object_dtype,is_float_dtype,is_string_dtype,is_integer_dtype
from astropy.nddata import bitmask
from astropy import units as u
from astropy.coordinates import SkyCoord
from scipy.interpolate import interp1d
def makepath(path,raiseError=True):
if path == '':
return(0)
if not os.path.isdir(path):
os.makedirs(path)
if not os.path.isdir(path):
if raiseError:
raise RuntimeError('ERROR: Cannot create directory %s' % path)
else:
return(1)
return(0)
def makepath4file(filename,raiseError=True):
path = os.path.dirname(filename)
if not os.path.isdir(path):
return(makepath(path,raiseError=raiseError))
else:
return(0)
def rmfile(filename,raiseError=1,gzip=False):
" if file exists, remove it "
if os.path.lexists(filename):
os.remove(filename)
if os.path.isfile(filename):
if raiseError == 1:
raise RuntimeError('ERROR: Cannot remove %s' % filename)
else:
return(1)
if gzip and os.path.lexists(filename+'.gz'):
os.remove(filename+'.gz')
if os.path.isfile(filename+'.gz'):
if raiseError == 1:
raise RuntimeError('ERROR: Cannot remove %s' % filename+'.gz')
else:
return(2)
return(0)
def rmfiles(filenames,raiseError=1,gzip=False):
if not (type(filenames) is list):
raise RuntimeError("List type expected as input to rmfiles")
errorflag = 0
for filename in filenames:
errorflag |= rmfile(filename,raiseError=raiseError,gzip=gzip)
return(errorflag)
#https://numpy.org/doc/stable/reference/routines.set.html
def AorB(A,B):
if len(A) == 0:
return(B)
if len(B) == 0:
return(A)
return(np.union1d(A,B))
def AandB(A,B,assume_unique=False,keeporder=False):
if keeporder:
# This is slower, but keeps order
out=[]
for i in A:
if i in B:
out.append(i)
return(out)
return(np.intersect1d(A,B,assume_unique=assume_unique))
def AnotB(A,B,keeporder=False):
if keeporder:
# This is slower, but keeps order
out=[]
for i in A:
if not(i in B):
out.append(i)
return(out)
return(np.setdiff1d(A,B))
def not_AandB(A,B):
return(np.setxor1d(A,B))
def unique(A):
unique = []
for a in A:
if a not in unique:
unique.append(a)
return unique
def radec2coord(ra, dec):
unit = [u.deg, u.deg]
if ':' in str(ra):
# Assume input RA/DEC are hour/degree
unit[0] = u.hourangle
try:
coord = SkyCoord(ra, dec, frame='icrs', unit=unit)
return(coord)
except ValueError:
print(f'ERROR: cannot interpret: RA={ra} DEC={dec}')
return(None)
class pdastroclass:
def __init__(self,hexcols=[],hexcol_formatters={},**kwargs):
self.t = pd.DataFrame(**kwargs)
self.verbose = 0
# if self.auto_convert_dtypes==True, then before .to_string() is called in self.write, self.t.convert_dtypes() is run
#self.auto_convert_dtypes = True
# special cases: columns in hexadecimal format.
self.hexcols=hexcols
self.hexcol_formatters=hexcol_formatters
# if a file is successfully loaded, the filename is saved in this varviable
self.filename = None
# if self.default_dtypeMapping != None, then the dtype mapping is applied to the table .to_string() is called in self.write
# This makes sure that formatters don't throw errors if the type of a column got changed to float or object during
# one of the table operations
self.default_dtypeMapping = None
# example:
# self.default_dtypeMapping = {'counter':np.int64}
# default_formatters are passed to to_string() in self.write
self.default_formatters = {}
# example:
# self.default_formatters = {'MJD':'{:.6f}'.format,'counter':'{:05d}'.format}
# add list of columns to be skipped when using write function
self.skipcols = []
# dictionary for the splines. arguments are the y columns of the spline
self.spline={}
def load_lines(self,lines,sep='\s+',**kwargs):
#errorflag = self.load_spacesep(io.StringIO('\n'.join(lines)),sep=sep,**kwargs)
errorflag = self.load_spacesep(io.StringIO('\n'.join(lines)),**kwargs)
return(errorflag)
def load_cmpfile(self,filename,**kwargs):
"""
load old frankenstein format of cmp file
Parameters
----------
filename :
cmp filename.
Returns
-------
errorflag and fits header
"""
cmphdr = fits.getheader(filename)
s = ''
for i in range(1,int(cmphdr['NCOLTBL'])+1):
s+=' %s' % cmphdr['COLTBL%d' % i]
s = re.sub('Xpos','X',s)
s = re.sub('Ypos','Y',s)
lines = open(filename,'r').readlines()
lines[0]=s
errorflag = self.load_spacesep(io.StringIO('\n'.join(lines)))
return(errorflag,cmphdr)
def load_spacesep(self,filename,test4commentedheader=True,namesMapping=None,roundingMapping=None,
hexcols=None,auto_find_hexcols=True, delim_whitespace=True,
na_values=['None','-','--'],verbose=False,**kwargs):
#kwargs['delim_whitespace']=True
#also test for commented header to make it compatible to old format.
self.load(filename,na_values=na_values,test4commentedheader=test4commentedheader,
namesMapping=namesMapping,roundingMapping=roundingMapping,delim_whitespace=delim_whitespace,
hexcols=hexcols,auto_find_hexcols=auto_find_hexcols,verbose=verbose,**kwargs)
return(0)
def load(self,filename,raiseError=True,test4commentedheader=False,namesMapping=None,roundingMapping=None,
hexcols=None,auto_find_hexcols=True,verbose=False,delim_whitespace=True,check4csv=True,**kwargs):
#self.t = ascii.read(filename,format='commented_header',delimiter='\s',fill_values=[('-',0),('--',0)])
try:
if verbose: print('Loading %s' % filename)
if check4csv and re.search('csv$',filename):
self.t = pd.read_csv(filename,delim_whitespace=delim_whitespace,comment='#',**kwargs)
else:
self.t = pd.read_table(filename,delim_whitespace=delim_whitespace,**kwargs)
self.filename = filename
except Exception as e:
print('ERROR: could not read %s!' % filename)
if raiseError:
raise RuntimeError(str(e))
self.filename = None
return(1)
if test4commentedheader:
# This is to make it compatible to my old-style commented header files!
# commented header: make sure it doesn't count the '#' as a column!
if self.t.columns[0]=='#':
renamemapping = {}
for i in range(len(self.t.columns)-1):
renamemapping[self.t.columns[i]]=self.t.columns[i+1]
renamemapping[self.t.columns[-1]]='__delme'
self.t = self.t.rename(columns=renamemapping)
self.t = self.t.drop(columns=['__delme'])
# make sure the # is not kept in column name!
if self.t.columns[0][0]=='#':
self.t = self.t.rename(columns={self.t.columns[0]:self.t.columns[0][1:]})
if hexcols is None: hexcols=[]
hexcols.extend(self.hexcols)
self.formattable(namesMapping=namesMapping,roundingMapping=roundingMapping,hexcols=hexcols,auto_find_hexcols=auto_find_hexcols)
return(0)
def write(self,filename=None,indices=None,columns=None,formatters=None,
raiseError=True,overwrite=True,verbose=False,
commentedheader=False,
index=False, makepathFlag=True,convert_dtypes=False,
hexcols=None,skipcols=None,**kwargs):
# make sure indices are converted into a valid list
indices=self.getindices(indices)
# make sure columns are converted into a valid list
columns=self.getcolnames(columns)
if skipcols is None: skipcols = self.skipcols
if len(skipcols)>0:
columns=AnotB(columns,skipcols,keeporder=True)
# make the path to the file if necessary
if not (filename is None):
if makepathFlag:
if makepath4file(filename,raiseError=False):
errorstring='ERROR: could not make directory for %s' % filename
if raiseError:
raise RuntimeError(errorstring)
#print(errorstring)
return(1)
# if overwrite, then remove the old file first
if os.path.isfile(filename):
if overwrite:
os.remove(filename)
if os.path.isfile(filename):
errorstring='ERROR: could not clobber %s' % filename
if raiseError:
raise RuntimeError(errorstring)
#print(errorstring)
return(2)
else:
if raiseError:
raise RuntimeError(f'file {filename} already exists! use overwrite option...')
print(f'Warning: file {filename} already exists, not deleting it, skipping! if you want to overwrite, use overwrite option!')
return(0)
# Fix the dtypes if wanted. THIS IS DANGEROUS!!!
if convert_dtypes:
# Using pandas convert_dtypes converts the columns into panda types, e.g. Int64. These types
# are different than the numpy types (e.g., int64). The numpy types work with '{}'.format, the panda
# int types do not!!! Therefore I set convert_dtype to False by default.
self.t=self.t.convert_dtypes()
if not(self.default_dtypeMapping is None):
self.formattable(dtypeMapping=self.default_dtypeMapping)
# if both formatters and self.defaultformatters are None, then no formatting. formatters supersedes self.defaultformatters
if formatters is None:
formatters = self.default_formatters
# hexcols
if hexcols is None: hexcols=[]
hexcols.extend(self.hexcols)
if len(hexcols)>0:
if formatters is None: formatters={}
for hexcol in self.hexcols:
if not(hexcol in self.t.columns):
#Nothing to do yet!
continue
if not (hexcol in formatters):
if isinstance(self.t[hexcol].dtype,np.int64):
formatters[hexcol]='0x{:08x}'.format
else:
formatters[hexcol]='0x{:04x}'.format
if verbose>1 and not(filename is None): print('Saving %d rows into %s' % (len(indices),filename))
# ugly hack: backwards compatibility to old commented header texttable format:
# rename first column temporarily to have a '#'
if commentedheader:
columns=list(columns)
self.t.rename(columns={columns[0]:f'#{columns[0]}'},inplace=True)
columns[0]=f'#{columns[0]}'
if len(indices)==0:
# just save the header
if filename is None:
print(' '.join(columns)+'\n')
else:
if columns is None:
columns = []
open(filename,'w').writelines(' '.join(columns)+'\n')
else:
if filename is None:
print(self.t.loc[indices].to_string(index=index, columns=columns, formatters=formatters, **kwargs))
else:
self.t.loc[indices].to_string(filename, index=index, columns=columns, formatters=formatters, **kwargs)
# reverse ugly hack
if commentedheader:
columns[0]=re.sub('^\#','',columns[0])
self.t.rename(columns={"#"+columns[0]:columns[0]},inplace=True)
if not (filename is None):
# some extra error checking...
if not os.path.isfile(filename):
errorstring='ERROR: could not save %s' % filename
if raiseError:
raise RuntimeError(errorstring)
#print(errorstring)
return(3)
return(0)
def formattable(self,namesMapping=None,roundingMapping=None,dtypeMapping=None,hexcols=None,auto_find_hexcols=False):
if not(namesMapping is None):
self.t = self.t.rename(columns=namesMapping)
if not(roundingMapping is None):
self.t = self.t.round(roundingMapping)
if not(dtypeMapping is None):
for col in dtypeMapping:
self.t[col] = self.t[col].astype(dtypeMapping[col])
# hexadecimal columns
hexpattern = re.compile('^0x[a-fA-F0-9]+$')
# if wanted, find columns that are in hexadecimal string format, and convert it interger.
# These columns are added to self.hexcols
if auto_find_hexcols and len(self.t)>0:
for col in self.t.columns:
if self.t[col].dtype == object and isinstance(self.t.at[0,col],str):
if not(hexpattern.search(self.t.at[0,col]) is None):
# add column to self.hexcols, so that it stays a hexcol when writing and saving
if not(col in self.hexcols):
self.hexcols.append(col)
# convert it to int
self.t[col] = self.t[col].apply(int, base=16)
# Go through hexcols, and check if they need conversion.
# These columns are also added to self.hexcols
if not(hexcols is None):
for hexcol in hexcols:
# add column to self.hexcols, so that it stays a hexcol when writing and saving
if not(hexcol in self.hexcols):
self.hexcols.append(hexcol)
if not(hexcol in self.t.columns):
# nothing to do (yet)!
continue
# if the column is still a string starting with '0x', convert it to int
if self.t[hexcol].dtype == object and isinstance(self.t.at[0,hexcol],str):
if not(hexpattern.search(self.t.at[0,hexcol]) is None):
self.t[hexcol] = self.t[hexcol].apply(int, base=16)
return(0)
def getindices(self,indices=None):
"""make indices conform (input can be None,([list],), int, str, or list). The output is a list """
#If indices is None, return all values
if indices is None:
return(self.t.index.values)
# If indices=([indiceslist],), then it needs to be changed to indices=indiceslist
if isinstance(indices,tuple):
if len(indices)==0:
#return an empty list instead of empty tuple
return([])
# teh first entry is a list, then these are the relevant indices!
if isinstance(indices[0],list) or isinstance(indices[0],np.ndarray):
return(indices[0])
else:
return(list(indices))
# if the passed value is an integer or str, make it a list!
if isinstance(indices,int) or isinstance(indices,str) or isinstance(indices,float):
return([indices])
indices=np.array(indices)
#if not (isinstance(indices,list) or isinstance(indices,np.ndarray)):
# raise RuntimeError("Can't convert this to an indices list!",type(indices),indices)
return(indices)
def getcolnames(self,colnames=None):
"""Return a list of all colnames of colnames=None. If colnames=string, return a list"""
if (colnames is None):
colnames = self.t.columns[:]
elif isinstance(colnames,str):
if colnames.lower()=='all':
colnames = self.t.columns[:]
else:
colnames=[colnames]
return(colnames)
def ix_remove_null(self,colnames=None,indices=None):
print('ix_remove_null deprecated, replace with ix_not_null')
return(self.ix_not_null(colnames=colnames,indices=indices))
def ix_not_null(self,colnames=None,indices=None):
# get the indices based on input.
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
for colname in colnames:
#print('XXX',indices)
(notnull,) = np.where(pd.notnull(self.t.loc[indices,colname]))
indices = indices[notnull]
#print('YYY',notnull)
return(indices)
def ix_is_null(self,colnames=None,indices=None):
# get the indices based on input.
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
for colname in colnames:
#print('XXX',indices)
(null,) = np.where(pd.isnull(self.t.loc[indices,colname]))
indices = indices[null]
#print('YYY',null)
return(indices)
def ix_equal(self,colnames,val,indices=None):
# get the indices based on input.
# use isnull() if val is None
if val is None:
indices = self.ix_is_null(colnames,indices=indices)
return(indices)
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
for colname in colnames:
(keep,) = np.where(self.t.loc[indices,colname].eq(val))
indices = indices[keep]
return(indices)
def ix_not_equal(self,colnames,val,indices=None):
# get the indices based on input.
# use notnull() if val is None
if val is None:
indices = self.ix_not_null(colnames,indices=indices)
return(indices)
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
for colname in colnames:
(keep,) = np.where(self.t.loc[indices,colname].ne(val))
indices = indices[keep]
return(indices)
def ix_inrange(self,colnames=None,lowlim=None,uplim=None,indices=None,
exclude_lowlim=False,exclude_uplim=False):
# get the indices based on input.
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
#print(colnames)
for colname in colnames:
print(colname,uplim)
if not(lowlim is None):
if exclude_lowlim:
(keep,) = np.where(self.t.loc[indices,colname].gt(lowlim))
else:
(keep,) = np.where(self.t.loc[indices,colname].ge(lowlim))
indices = indices[keep]
#print('lowlim cut:',keep)
if not(uplim is None):
if exclude_uplim:
(keep,) = np.where(self.t.loc[indices,colname].lt(uplim))
else:
(keep,) = np.where(self.t.loc[indices,colname].le(uplim))
indices = indices[keep]
#print('uplim cut:',keep)
return(indices)
def ix_outrange(self,colnames=None,lowlim=None,uplim=None,indices=None,
exclude_lowlim=False,exclude_uplim=False):
# get the indices based on input.
indices=self.getindices(indices)
# get the column names over which to iterate
colnames=self.getcolnames(colnames)
#print('BBB',indices)
for colname in colnames:
if not(lowlim is None):
if exclude_lowlim:
(keeplow,) = np.where(self.t.loc[indices,colname].lt(lowlim))
else:
(keeplow,) = np.where(self.t.loc[indices,colname].le(lowlim))
#print('lowlim cut:',keeplow)
else:
keeplow=[]
if not(uplim is None):
if exclude_uplim:
(keepup,) = np.where(self.t.loc[indices,colname].gt(uplim))
else:
(keepup,) = np.where(self.t.loc[indices,colname].ge(uplim))
#print('uplim cut:',keepup)
else:
keepup=[]
indices = indices[AorB(keeplow,keepup)]
return(indices)
def ix_unmasked(self,maskcol,maskval=None,indices=None):
# get the indices based on input.
indices=self.getindices(indices)
if maskval is None:
(keep,) = np.where(self.t.loc[indices,maskcol].eq(0))
else:
(keep,) = np.where(bitmask.bitfield_to_boolean_mask(self.t.loc[indices,maskcol].astype('int'),ignore_flags=~maskval,good_mask_value=True))
indices = indices[keep]
return(indices)
def ix_masked(self,maskcol,maskval=None,indices=None):
# get the indices based on input.
indices=self.getindices(indices)
if maskval is None:
(keep,) = np.where(self.t.loc[indices,maskcol].ne(0))
else:
(keep,) = np.where(bitmask.bitfield_to_boolean_mask(self.t.loc[indices,maskcol].astype('int'),ignore_flags=~maskval))
indices = indices[keep]
return(indices)
def ix_matchregex(self,col,regex,indices=None):
# get the indices based on input.
indices=self.getindices(indices)
(keep,) = np.where((self.t.loc[indices,col].str.contains(regex)==True))
#bla = self.t[col].str.contains(regex)==True
indices = indices[keep]
return(indices)
def ix_sort_by_cols(self,cols,indices=None,ascending=True):
# get the indices based on input.
indices=self.getindices(indices)
# get the column names (makes sure that it is a list)
cols=self.getcolnames(cols)
ix_sorted = self.t.loc[indices].sort_values(cols,ascending=ascending).index.values
return(ix_sorted)
def newrow(self,dicti=None):
#self.t = self.t.append(dicti,ignore_index=True)
self.t = pd.concat([self.t,pd.DataFrame([dicti])],axis=0, ignore_index=True)
return(self.t.index.values[-1])
def add2row(self,index,dicti):
self.t.loc[index,list(dicti.keys())]=list(dicti.values())
return(index)
def fitsheader2table(self,fitsfilecolname,indices=None,requiredfitskeys=None,optionalfitskey=None,
raiseError=True,verify='silentfix',skipcolname=None,headercol=None,ext=None,extname=None,
prefix=None,suffix=None):
def fitskey2col(fitskey,prefix=None,suffix=None):
col = fitskey
if prefix is not None: col = prefix+col
if suffix is not None: col += suffix
return(col)
indices = self.getindices(indices)
# initialize columns if necessary
if requiredfitskeys!=None:
for fitskey in requiredfitskeys:
col = fitskey2col(fitskey,prefix=prefix,suffix=suffix)
if not (col in self.t.columns):
self.t[col]=None
if optionalfitskey!=None:
for fitskey in optionalfitskey:
col = fitskey2col(fitskey,prefix=prefix,suffix=suffix)
if not (col in self.t.columns):
self.t[col]=None
if headercol!=None and (not (headercol in self.t.columns)):
self.t[headercol]=None
# loop through the images
for index in indices:
#header = fits.getheader(self.t.loc[index,fitsfilecolname],ext=ext,extname=extname)
# It was impossible to use 'verify' with getheader...
hdu = fits.open(self.t.loc[index,fitsfilecolname],ext=ext,extname=extname,output_verify="silentfix")
if verify is not None: hdu.verify(verify)
header = hdu[0].header
if headercol!=None:
self.t[headercol]=header
if skipcolname!=None:
self.t.loc[index,skipcolname]=False
if requiredfitskeys!=None:
for fitskey in requiredfitskeys:
col = fitskey2col(fitskey,prefix=prefix,suffix=suffix)
if fitskey in header:
self.t.loc[index,col]=header[fitskey]
else:
if raiseError:
raise RuntimeError("fits key %s does not exist in file %s" % (fitskey,self.t[fitsfilecolname][index]))
else:
self.t.loc[index,col]=None
if skipcolname!=None:
self.t.loc[index,skipcolname]=True
if optionalfitskey!=None:
for fitskey in optionalfitskey:
col = fitskey2col(fitskey,prefix=prefix,suffix=suffix)
if fitskey in header:
self.t.loc[index,col]=header[fitskey]
else:
self.t.loc[index,col]=None
def dateobs2mjd(self,dateobscol,mjdcol,timeobscol=None,indices=None,tformat='isot'):
indices = self.getindices(indices)
if len(indices)==0:
return(0)
if not (mjdcol in self.t.columns):
self.t.loc[indices,mjdcol]=None
if timeobscol!=None:
dateobslist = list(self.t.loc[indices,dateobscol]+'T'+self.t.loc[indices,timeobscol])
else:
dateobslist = list(self.t.loc[indices,dateobscol])
dateobjects = Time(dateobslist, format=tformat, scale='utc')
mjds = dateobjects.mjd
self.t.loc[indices,mjdcol]=mjds
def mjd2dateobs(self,mjdcol,dateobscol,indices=None,tformat='isot'):
indices = self.getindices(indices)
if len(indices)==0:
return(0)
if not (dateobscol in self.t.columns):
self.t.loc[indices,dateobscol]=None
mjdlist = list(self.t.loc[indices,mjdcol])
dateobjects = Time(mjdlist, format='mjd')
dateobs = dateobjects.to_value('isot')
self.t.loc[indices,dateobscol]=dateobs
def calc_color(self,f1,df1,f2,df2,outcolor=None,outcolor_err_nameformat='e(%s)',indices=None,color_formatter='{:.3f}'.format):
indices = self.getindices(indices)
if len(indices)==0:
return(0)
# get color and uncertainty column names
if outcolor is None: outcolor = '%s-%s' % (f1,f2)
outcolor_err = outcolor_err_nameformat % outcolor
# delete all previous results
self.t.loc[indices,outcolor]= np.nan
self.t.loc[indices,outcolor_err]= np.nan
# Only use indices for which both filters have uncertainties, i.e. they are not upper limits
ix_good = self.ix_not_null(df1,indices=indices)
ix_good = self.ix_not_null(df2,indices=ix_good)
self.t[outcolor] = self.t.loc[ix_good,f1] - self.t.loc[ix_good,f2]
self.t[outcolor_err] = np.sqrt(np.square(self.t.loc[ix_good,df1]) + np.square(self.t.loc[ix_good,df2]))
if not(color_formatter is None):
if self.default_formatters is None:
self.default_formatters = {}
self.default_formatters[outcolor]=color_formatter
self.default_formatters[outcolor_err]=color_formatter
return(0)
def radeccols_to_SkyCoord(self,racol=None,deccol=None,indices=None, frame='icrs',
assert_0_360_limits=True,assert_pm90_limits=True):
if (racol is None) and (deccol is None):
raise RuntimeError('You need to specify at least one of racol or deccol')
indices = self.getindices(indices)
for col in [racol,deccol]:
if col is None: continue
ixs_null = self.ix_is_null(col,indices)
if len(ixs_null)>0:
self.write(indices=ixs_null)
raise RuntimeError(f'Null values for column {col} in above row(s)')
if len(indices)==0:
print('Warning, trying to assert ra/dec columns are decimal for 0 rows')
return([],[])
# fill ra and dec
ra = np.full(len(indices),np.nan)
dec = np.full(len(indices),np.nan)
if racol is not None: ra = np.array(self.t.loc[indices,racol])
if deccol is not None: dec = np.array(self.t.loc[indices,deccol])
# check if all cols are already in numerical format. If yes, all good! This can speed things up!!
numflag=True
for col in [racol,deccol]:
if col is None: continue
if not(col in self.t.columns):
raise RuntimeError(f'Column {col} is not in columns {self.t.columns}')
if not(is_float_dtype(self.t[col]) or is_integer_dtype(self.t[col])):
numflag=False
if numflag:
# no conversion needed!
# check ra dec limits if wanted
if (ra is not None) and assert_0_360_limits:
ra = np.where(ra<0.0,ra+360.0,ra)
ra = np.where(ra>=360.0,ra-360.0,ra)
if (dec is not None) and assert_pm90_limits:
dec = np.where(dec<-90.0,dec+180.0,dec)
dec = np.where(dec> 90.0,dec-180.0,dec)
coord = SkyCoord(ra, dec, frame=frame, unit=(u.deg,u.deg))
else:
# convert the strings into decimal degrees
unit = [u.deg,u.deg]
check_line_by_line = False
# check format of racol if necessary
if racol is not None:
hexagesimal = unique([(re.search(':',x) is not None) for x in ra])
if len(hexagesimal)==1:
# if hexagesimal true, set unit of RA to hourangle!!
if hexagesimal[0]:
unit[0] = u.hourangle
elif len(hexagesimal)==2:
# both formats? then check line by line!
print('Warning: it looks like there are inconsistent formats in RA column {racol}! checking line by line for sexagesimal')
check_line_by_line = True
else:
raise RuntimeError('Something is wrong here when trying to determine if RA col {racol} is sexagesimal! ')
if check_line_by_line:
coord = np.full(len(indices),np.nan)
hexagesimal = [(re.search(':',x) is not None) for x in ra]
raunits=np.where(hexagesimal,u.hourangle,u.deg)
for i in range(len(ra)):
coord[i] = SkyCoord(ra[i], dec[i], frame=frame, unit=(raunits[i],u.deg))
else:
coord = SkyCoord(ra, dec, frame=frame, unit=unit)
# no need to check ra dec limits, already done in SkyCoord
return(indices,coord)
def assert_radec_cols_decimal_degrees(self,racol=None,deccol=None,
outracol=None,outdeccol=None,
indices=None,coordcol=None,
assert_0_360_limits=True,
assert_pm90_limits=True):
(indices,coord) = self.radeccols_to_SkyCoord(racol=racol,deccol=deccol,indices=indices,
assert_0_360_limits=assert_0_360_limits,assert_pm90_limits=assert_pm90_limits)
if outracol is None: outracol = racol
if outdeccol is None: outdeccol = deccol
if outracol is not None: self.t.loc[indices,outracol]=coord.ra.degree
if outdeccol is not None: self.t.loc[indices,outdeccol]=coord.dec.degree
if coordcol is not None:
self.t.loc[indices,coordcol]=coord
# Don't write coordcol
if not (coordcol in self.skipcols): self.skipcols.append(coordcol)
return(0)
def assert_radec_cols_sexagesimal(self,racol=None,deccol=None,
outracol=None,outdeccol=None,
indices=None,coordcol=None,
precision=3,
assert_0_360_limits=True,
assert_pm90_limits=True):
(indices,coord) = self.radeccols_to_SkyCoord(racol=racol,deccol=deccol,indices=indices,
assert_0_360_limits=assert_0_360_limits,
assert_pm90_limits=assert_pm90_limits)
if outracol is None: outracol = racol
if outdeccol is None: outdeccol = deccol
# list of ra/dec pairs
hmsdms = map(lambda x: x.split(' '), coord.to_string(sep=':', style='hmsdms', precision=precision))
# unzip pairs into a list of ra and a list of dec
radeclist = list(zip(*hmsdms))
if outracol is not None: self.t.loc[indices,outracol]=radeclist[0]
if outdeccol is not None: self.t.loc[indices,outdeccol]=radeclist[1]
if coordcol is not None:
self.t.loc[indices,coordcol]=coord
# Don't write coordcol
if not (coordcol in self.skipcols): self.skipcols.append(coordcol)
return(0)
def radeccols_to_coord(self,racol,deccol,coordcol,indices=None,
assert_0_360_limits=True,assert_pm90_limits=True):
(indices,coord) = self.radeccols_to_SkyCoord(racol=racol,deccol=deccol,indices=indices,
assert_0_360_limits=assert_0_360_limits,assert_pm90_limits=assert_pm90_limits)
self.t.loc[indices,coordcol]=coord
# Don't write coordcol
if not (coordcol in self.skipcols): self.skipcols.append(coordcol)
return(0)
def flux2mag(self,fluxcol,dfluxcol,magcol,dmagcol,indices=None,
zpt=None,zptcol=None, upperlim_Nsigma=None):
indices = self.getindices(indices)
if len(indices)==0:
return(0)
# delete all previous results
self.t.loc[indices,magcol]= np.nan
self.t.loc[indices,dmagcol]= np.nan
# if upperlim_Nsigma is not None, then upper limits are calculated.
if upperlim_Nsigma is None:
indices_mag = indices
# no upper limits
indices_ul = indices_ul_negative = []
else:
# calculate the S/N
self.t.loc[indices,'__tmp_SN']=self.t.loc[indices,fluxcol]/self.t.loc[indices,dfluxcol]
# Get the indices with valid S/N
indices_validSN = self.ix_not_null('__tmp_SN',indices=indices)
# get the indices for which the S/N>=upperlim_Nsigma
indices_mag = self.ix_inrange(['__tmp_SN'],upperlim_Nsigma,indices=indices_validSN)
# all the other indices can only be used for upper limits
indices_ul = AnotB(indices_validSN,indices_mag)
# get the indices for which the S/N is negative
indices_ul_negative = self.ix_inrange(['__tmp_SN'],None,0.0,indices=indices_ul)
#self.t = self.t.drop(columns=['__tmp_SN'])
# Calculate the mags and uncertainties
self.t.loc[indices_mag,magcol]= -2.5*np.log10(self.t.loc[indices_mag,fluxcol])
self.t.loc[indices_mag,dmagcol] = 2.5 / np.log(10.0) * self.t.loc[indices_mag,dfluxcol]/self.t.loc[indices_mag,fluxcol]
# Are there upper limits to be calculated?
if len(indices_ul)>0:
# just to be sure, set dmags to nan
self.t.loc[indices_ul,dmagcol] = np.nan
indices_ul_positive = AnotB(indices_ul,indices_ul_negative)
if len(indices_ul_positive)>0:
# If flux is positive: upper limit = flux + Nsigma * dflux
self.t.loc[indices_ul_positive,magcol] = -2.5*np.log10(self.t.loc[indices_ul_positive,fluxcol]+upperlim_Nsigma*self.t.loc[indices_ul_positive,dfluxcol])
if len(indices_ul_negative)>0:
# If flux is negative: upper limit = Nsigma * dflux
self.t.loc[indices_ul_negative,magcol] = -2.5*np.log10(upperlim_Nsigma*self.t.loc[indices_ul_negative,dfluxcol])
if not(zpt is None):
self.t.loc[indices,magcol]+=zpt
if not(zptcol is None):
self.t.loc[indices,magcol]+=self.t.loc[indices,zptcol]
def initspline(self,xcol,ycol,indices=None,
kind='cubic',bounds_error=False,fill_value='extrapolate',
**kwargs):
if not (xcol in self.t.columns):
raise RuntimeError("spline: x column %s does not exist in table!" % xcol)
if not (ycol in self.t.columns):
raise RuntimeError("spline: y column %s does not exist in table!" % ycol)
# make sure there are no nan values
indices = self.ix_not_null(colnames=[xcol,ycol])
# initialize the spline and save it in self.spline with the key ycol
self.spline[ycol]= interp1d(self.t.loc[indices,xcol],self.t.loc[indices,ycol],
kind=kind,bounds_error=bounds_error,fill_value=fill_value,**kwargs)
def getspline(self,xval,ycol):
if not(ycol in self.spline):
raise RuntimeError('Spline for column %s is not defined!' % ycol)
return(self.spline[ycol](xval))
def plot(self,indices=None,*args,**kwargs):
indices = self.getindices(indices)
ax = self.t.loc[indices].plot(*args,**kwargs)
return(ax)
class pdastrostatsclass(pdastroclass):
def __init__(self,**kwargs):
pdastroclass.__init__(self,**kwargs)
self.reset()
self.set_statstring_format()
self.c4_smalln = [0.0, 0.0, 0.7978845608028654, 0.8862269254527579, 0.9213177319235613, 0.9399856029866251, 0.9515328619481445]
def c4(self,n):
#http://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation
if n<=6:
return(self.c4_smalln[n])
else:
return(1.0 - 1.0/(4.0*n) - 7.0/(32.0*n*n) - 19.0/(128.0*n*n*n))
def reset(self):
self.statparams = {}
for k in ['mean','mean_err','stdev','stdev_err','X2norm','ix_good','ix_clip']:
self.statparams[k]=None
for k in ['Ngood','Nclip','Nchanged','Nmask','Nnan','converged','i']:
self.statparams[k]=0
self.calc_stdev_X2_flag = True
def set_statstring_format(self,format_floats='{:f}',format_ints='{:d}',format_none='{}', format_X2norm='{:.2f}'):
self.str_format1 = "mean:%s(%s) stdev:%s(%s) X2norm:%s Nchanged:%s Ngood:%s Nclip:%s" % (format_floats,format_floats,format_floats,format_floats,format_X2norm,format_ints,format_ints,format_ints)
self.str_format_none = "mean:%s(%s) stdev:%s(%s) X2norm:%s Nchanged:%s Ngood:%s Nclip:%s" % (format_none,format_none,format_none,format_none,format_none,format_none,format_none,format_none)
#self.str_format2 = "mean:%s(%s) stdev:%s X2norm:%s Nchanged:%s Ngood:%s Nclip:%s" % (format_floats,format_floats,format_floats,format_floats,format_ints,format_ints,format_ints)
def statstring(self):
if self.statparams['mean'] is None or self.statparams['stdev'] is None:
formatstring = "WARNING! i:{:02d} "+ self.str_format_none
else:
formatstring = "i:{:02d} "+ self.str_format1
s = formatstring.format(self.statparams['i'],self.statparams['mean'],self.statparams['mean_err'],
self.statparams['stdev'],self.statparams['stdev_err'],self.statparams['X2norm'],
self.statparams['Nchanged'],self.statparams['Ngood'],self.statparams['Nclip'])
return(s)
def calcaverage_errorcut(self,datacol, noisecol, indices=None,
mean=None,Nsigma=None,medianflag=False,
return_ix=False, verbose=0):
# get the indices based on input.
indices=self.getindices(indices)
# If N-sigma cut and second iteration (i.e. we have a stdev from the first iteration), skip bad measurements.
if not(Nsigma is None) and not(mean is None):