Switch to faster read_csv() for Hipparcos catalog
Yes, it has fixed-width fields, but Pandas is slow at importing those, so we take advantage of the fact that it also has delimiters!
This commit is contained in:
parent
a7c2794b60
commit
0af8e01b4a
|
@ -41,6 +41,21 @@ analysis toolkit. Try installing it using your usual Python package
|
|||
installer, like "pip install pandas" or "conda install pandas".
|
||||
"""
|
||||
|
||||
_COLUMN_NAMES = (
|
||||
'Catalog', 'HIP', 'Proxy', 'RAhms', 'DEdms', 'Vmag',
|
||||
'VarFlag', 'r_Vmag', 'RAdeg', 'DEdeg', 'AstroRef', 'Plx', 'pmRA',
|
||||
'pmDE', 'e_RAdeg', 'e_DEdeg', 'e_Plx', 'e_pmRA', 'e_pmDE', 'DE:RA',
|
||||
'Plx:RA', 'Plx:DE', 'pmRA:RA', 'pmRA:DE', 'pmRA:Plx', 'pmDE:RA',
|
||||
'pmDE:DE', 'pmDE:Plx', 'pmDE:pmRA', 'F1', 'F2', '---', 'BTmag',
|
||||
'e_BTmag', 'VTmag', 'e_VTmag', 'm_BTmag', 'B-V', 'e_B-V', 'r_B-V',
|
||||
'V-I', 'e_V-I', 'r_V-I', 'CombMag', 'Hpmag', 'e_Hpmag', 'Hpscat',
|
||||
'o_Hpmag', 'm_Hpmag', 'Hpmax', 'HPmin', 'Period', 'HvarType',
|
||||
'moreVar', 'morePhoto', 'CCDM', 'n_CCDM', 'Nsys', 'Ncomp',
|
||||
'MultFlag', 'Source', 'Qual', 'm_HIP', 'theta', 'rho', 'e_rho',
|
||||
'dHp', 'e_dHp', 'Survey', 'Chart', 'Notes', 'HD', 'BD', 'CoD',
|
||||
'CPD', '(V-I)red', 'SpType', 'r_SpType',
|
||||
)
|
||||
|
||||
def load_dataframe(fobj, compression='gzip'):
|
||||
"""Given an open file for `hip_main.dat.gz`, return a parsed dataframe.
|
||||
|
||||
|
@ -49,21 +64,19 @@ def load_dataframe(fobj, compression='gzip'):
|
|||
|
||||
"""
|
||||
try:
|
||||
from pandas import read_fwf
|
||||
from pandas import read_csv
|
||||
except ImportError:
|
||||
raise ImportError(PANDAS_MESSAGE)
|
||||
|
||||
names, colspecs = zip(
|
||||
('hip', (2, 14)),
|
||||
('magnitude', (41, 46)),
|
||||
('ra_degrees', (51, 63)),
|
||||
('dec_degrees', (64, 76)),
|
||||
('parallax_mas', (79, 86)), # TODO: have Star load this
|
||||
('ra_mas_per_year', (87, 95)),
|
||||
('dec_mas_per_year', (96, 104)),
|
||||
df = read_csv(
|
||||
fobj, sep='|', compression=compression, names=_COLUMN_NAMES,
|
||||
usecols=['HIP', 'Vmag', 'RAdeg', 'DEdeg', 'Plx', 'pmRA', 'pmDE'],
|
||||
na_values=[' ', ' ', ' ', ' '],
|
||||
)
|
||||
df.columns = (
|
||||
'hip', 'magnitude', 'ra_degrees', 'dec_degrees',
|
||||
'parallax_mas', 'ra_mas_per_year', 'dec_mas_per_year',
|
||||
)
|
||||
|
||||
df = read_fwf(fobj, colspecs, names=names, compression=compression)
|
||||
df = df.assign(
|
||||
ra_hours = df['ra_degrees'] / 15.0,
|
||||
epoch_year = 1991.25,
|
||||
|
|
Loading…
Reference in New Issue