Source code for create_inputfile

"""
Make an input file for BASTA in XML format. Template version.
"""
import os
from basta.downloader import get_basta_dir

# Definition of the path to BASTA, just in case you need it
BASTADIR = get_basta_dir()


#
# THIS IS THE FUNCTION YOU ARE LOOKING FOR!
#
[docs] def define_input(define_io, define_fit, define_output, define_plots, define_intpol): """ Define user input for BASTA. Will fill the dictionaries with required information and pass it on to the XML-generation routines. PLEASE MODIFY THINGS IN THIS FUNCTION TO SUIT YOUR NEEDS """ # ================================================================================== # BLOCK 1: I/O # ================================================================================== # Name of the XML input file to produce # --> Use as input: BASTArun input_myfit.xml xmlfilename = "input_myfit.xml" # The path to the grid to be used by BASTA for the fitting. # --> If using isochrones, remember to also specify physics settings in BLOCK 3c # --> If you need the location of BASTA, it is in BASTADIR define_io["gridfile"] = os.path.join(BASTADIR, "grids", "Garstec_16CygA.hdf5") # Where to store the output of the BASTA run define_io["outputpath"] = os.path.join("output", "myfit") # BASTA is designed to fit multiple stars in the same run. To generate the input # file, a table in plain ascii with the observed stellar parameters must be # supplied. Please specify the path to the table and a tuple with the columns in the # table. # # Note that even when fitting a single star, such a file must be given! # # Regarding the columns: # --> The first entry should always be "starid" # --> The names must match entries in the parameter list (basta/constants.py) unless # it is "dnu". BASTA supports different dnu types and will automatically # translate "dnu" from the table into the correct one based on "fitparams" (see # Block 2 below). # --> Both value and error should be given for all quantities (add _err for errors) # --> It is important that this list matches the file contents!!! # # Also note that the file can contain more columns than required for the fit! # --> Only those relevant are included in the produced input file. # Location of the input file with the star(s) to be fitted and the columns included define_io["asciifile"] = os.path.join("data", "16CygA.ascii") define_io["params"] = ( "starid", "RA", "DEC", "numax", "numax_err", "dnu", "dnu_err", "Teff", "Teff_err", "FeH", "FeH_err", "logg", "logg_err", ) # Special option: Change the assumed delimiter for the ascii table # --> By default, it is None, meaning any consecutive whitespace act as delimiter # --> Modify to use e.g. a comma-separated file # define_io["delimiter"] = "," # Special option: Assumed placeholder for missing values in the ascii table # --> Generally it is advised to provide BASTA with a complete table with bad stars # removed, however BASTA can ignore missing values using this key. This might be # useful if using a large, pre-computer table, where some auxiliary data are not # available for certain stars. # --> PLEASE BE AWARE, that if any of the parameters you are including in the fit # are missing, the star will be skipped! # define_io["missingval"] = -999.999 # Using "overwriteparams" it is possible to assume a given (value, error) of any # parameter for all stars in the fit (e.g. assume all stars to have the same # temperature or dnu). This will overwrite whatever (value, error) is given for the # individual star! # define_io["overwriteparams"] = {"dnufit": (100, 2)} # ================================================================================== # BLOCK 2: Fitting control # ================================================================================== # A list of the parameters to fit must be given to BASTA in a tuple. # --> Must be present in the list of parameters in BLOCK 2 unless it is special # keyword (see below) # --> The names must match entries in the parameter list (basta/constants.py) # # Important note on keywords: # --> To fit frequencies add "freqs" # --> To fit ratios add "r012" (or whichever type you want to fit) # --> To fit epsilon differences add "e012" (or whichever type you want to fit) # --> To fit parallax/magnitude/distance, add "parallax" # --> If activating any special fits, remember to look in the corresponding block # below to set additional required settings!! E.g., for frequencies/ratios, look # in Block 2b and 2d. # # Important note on dnu: # --> BASTA can use different determinations of the large frequency separation # (dnu). The one provided must match the one you add to fitting parameters in # the next block. If present in the grid, "dnufit" is the most reliable one. # The full list is available in constants.py define_fit["fitparams"] = ("Teff", "FeH", "logg") # ------------------------------------------------------------ # BLOCK 2a: Fitting control, priors # ------------------------------------------------------------ # A key part of any Bayesian scheme is the specification of priors. By default, # BASTA assumes uninformative priors on all fitting parameters (more details in # the paper). In the following the priors can be specified further. # On class of priors is constrained, flat priors. In other words, it is restriction # of any standard grid parameters. This will reduce computation time, as BASTA # can safely ignore models in the grid with non-matching parameters. # --> These priors can be defined using a fixed absolute tolerance (abstol) or using # a number of sigmas (sigmacut), both of which will be applied based on the # observed value of the individual star. This is only avaiable for parameters in # fitparams. # --> Another option is to specify "min" and/or "max" of a parameter. This can be # done for all grid parameters. Remember that this range should encompass all # stars! An example of this is to restrict the mass for isochrones. define_fit["priors"] = {"Teff": {"sigmacut": "3"}, "FeH": {"abstol": "0.5"}} # If using asteroseismic data, it can be an advantage to apply a cut on dnu, as it # significantly reduces computation time. # define_fit["priors"] = {**define_fit["priors"], "dnufit": {"sigmacut": "3"}} # A different class of priors is to use an initial mass function (IMF). The full # list of available IMF's can be seen in basta/priors.py # define_fit["priors"] = {**define_fit["priors"], "IMF": "salpeter1955"} # A key functionality of BASTA is to use so-called Bayesian weights, which take the # sampling of the grid into account. These will also accommodate the different # evolutionary speed of stars in different phases. # --> IT IS NOT RECOMMENDED TO DISABLE THE USE OF THE WEIGHTS, but is can be done # for e.g. testing. # define_fit["bayweights"] = False # ------------------------------------------------------------ # BLOCK 2b: Fitting control, solar scaling # ------------------------------------------------------------ # When using asteroseismic observables, it is advantageous to make sure the observed # values are compatible with the quantities in the grid. This can be done by using # Sun to scale the input; dnu of the solar model in the grid is compared to the # assumed dnu of the Sun determined using the observational pipeline. # The dnu of the solar model is automatically extracted from the grid. If set to # False, the scaling functionality is switched off (not recommended unless testing). # define_fit["solarmodel"] = True # To perform the scaling, the observed values of dnu and numax for the Sun must be # assumed. By default BASTA uses the values from the SYD pipeline. # define_fit["sundnu"] = 135.1 # define_fit["sunnumax"] = 3090.0 # ------------------------------------------------------------ # BLOCK 2c: Fitting control, isochrones # ------------------------------------------------------------ # When fitting to (the BaSTI) isochrones, the input physics ("science case") MUST be # specified. Set in the tuple "odea": # - o: Overshooting efficiency [step] (0 = no overshooting). Activation value: 0.2 # - d: Diffusion (0 = no diffusion, 1 = diffusion) # - e: Mass loss [Reimers eta] (0 = no mass loss). Activation value: 0.3 # - a: Alpha enhancement [fixed] (0 = no alpha enhancement). Activation value: 0.4 # # Possible science cases: # define_fit["odea"] = (0, 0, 0, 0) # "Normal" # define_fit["odea"] = (0.2, 0, 0, 0) # Overshooting # define_fit["odea"] = (0.2, 0, 0.3, 0) # + mass loss # define_fit["odea"] = (0.2, 1, 0.3, 0) # + diffusion # define_fit["odea"] = (0.2, 1, 0.3, 0.4) # + alpha enhancement # ------------------------------------------------------------ # BLOCK 2d: Fitting control, frequencies # ------------------------------------------------------------ # When fitting individual frequencies or ratios additional information is required. # Please set in the dictionary below: # # - "freqpath": The path to the frequency files. IMPORTANT:: The names of the # frequency files are assumed to match the corresponding StarID's. # # - "fcor": The assumed correction prescription for the asteroseismic surface # effect. Choose between the following: # * "BG14": Two-term correction from Ball & Gizon (2014) # * "cubicBG14": One-term (cubic term) correction from Ball & Gizon (2014) # * "HK08": Power law from Kjeldsen et al. (2008). IMPORTANT: When using # this correction, "bexp" must be specified in the dictionary! # Typically a value of 4.5 is used. # * "None": No correction. PLEASE ONLY USE FOR TESTING/VALIDATION or if # input frequencies have already been corrected. # # - "correlations": To include the correlations between the frequencies/ratios in # the fit. Should always be set to True for surface-independent # fitting (ratios or epsilon differences)! # # - "nrealizations": Number of realizations of frequencies to derive covariances, # if these are not provided by the user. Default is 10000. # # - "dnufrac": Only model matching the lowest observed l=0 within this fraction is # considered. This is useful when fitting ratios. It is also for # computational efficiency purposes, as the model search is restricted. # # - "seismicweight": To balance the fit (and let the classical observables still # have an impact), it is customary to weight/scale the # contribution of the individual frequencies (or ratios). # Typcally, the chi2 term is divided by a given factor before # added to the total chi2. Choose between the following: # * "1/N": The default weighting scheme. The seismic chi2 is # divided/normalised by the total number of frequencies # (or ratios). It is possible to manually adjust the # scaling by specifying "N". # * "1/1": No scaling. Each frequency counts as one classical # parameter # * "1/N-dof": Normalisation by number of frequencies minus the # (user-specified) degrees of freedom. This option # must be supplemented by a specification of "dof". # # - "threepoint": Set 'True' to change to the three-point small frequency separation # formulation for the frequency ratios, instead of the five-point # small frequency separation. The default, if not set or set as # 'False', is to use the five-point formulation. # # - "dnuprior": Set to 'False' to disable the automatic prior on dnu (mimicking the # range defined by dnufrac), which speeds up the computation of the # fit. Recommended to keep as 'True' (default). # # - "dnufit_in_ratios": Set 'True' to include a fit between the observed large # frequency separation, and the model value calculated from # surface-corrected frequencies, when fitting ratios. # Default is 'False'. # # - "dnubias": Estimated systematic/bias to add to the error of the large frequency # separation determined from individual frequencies. Default is 0. # # - "readglitchfile": Set 'True' to look for an input (hdf5) file with precomputed # glitches (and ratios), to read data and options for. The # following 'glitchparams' dictionary (block 2f) is arbitrary # if 'True', as options are read from file for consistency. # Default is 'False'. # Example of typical settings for a frequency fit (with default seismic weights): # define_fit["freqparams"] = { # "freqpath": "data/freqs", # "fcor": "BG14", # "correlations": False, # "dnufrac": 0.15, # } # An example of manually forcing the weights with "N", and an example of using "dof" # define_fit["freqparams"] = { # **define_fit["freqparams"], # "seismicweight": "1/N", # "N": 2, # } # define_fit["freqparams"] = { # **define_fit["freqparams"], # "seismicweight": "1/N-dof", # "dof": 3, # } # ------------------------------------------------------------ # BLOCK 2e: Fitting control, distances # ------------------------------------------------------------ # If fitting parallax and/or predicting distance, additional information is required # below. # Which photometric filters to use. Must be avaiable for the stars (i.e. read from # the table in Block 1). The filter names must be valid (i.e. match entries in the # list of available parameters). # define_fit["filters"] = ("Mj_2MASS", "Mh_2MASS", "Mk_2MASS") # Assumed coordinate system. IMPORTANT: Remember to add coordinates to all stars in # reading in Block 1. Can be: # - "galactic": Input coordinates are expected in galactic coordinates longitude (l) # and latitude (b). # - "icrs": Input coordinates are expected in celestial right ascension (RA) and # declination (DEC). # define_fit["dustframe"] = "icrs" # ------------------------------------------------------------ # BLOCK 2f: Fitting control, glitches # ------------------------------------------------------------ # If fitting glitches (and ratios), from the given individual frequencies, # the following specifications are necessary, in addition to the frequency # parameters specificed in block 2d. If the glitches have been precomputed, and # provided in the correct format, set "readglitchfile: True" in block 2d, whereby # this block is arbitrary. # # - "method": In which frequency parameters the glitch signatures are fitted. "Freq" # fits the signatures in the oscillation frequencies (default), while # "SecDif" fits the signatures in the second differences. # # - "npoly_params": Number of parameters in the smooth component. Recommended are # 5 for "Freq" method (default), and 3 for "SecDif" method. # # - "nderiv": Order of derivative used in the regularization. Recommended are 3 for # "Freq" method (default), and 1 for "SecDif" method. # # - "tol_grad": Tolerance on gradients, typically between 1e-2 and 1e-5 depending on # quality of the data and the method used (1e-3 default) # # - "regu_param": Regularization parameter. Recommended are 7 for "Freq" method # (default), and 1000 for "SecDif" method. # # - "nguesses": Number of initial guesses in search for the global minimum. # (200 default) # define_fit["glitchparams"] = { # "method": "Freq", # "npoly_params": 5, # "nderiv": 3, # "tol_grad": 1e-3, # "regu_param": 7, # "nguesses": 200, # } # ================================================================================== # BLOCK 3: Output control # ================================================================================== # A list of quantities to output. Will be printed to the log of each individual star # and stored in the output/results file(s). # --> The names must match entries in the parameter list (basta/constants.py) # --> A reasonable choice is to (as a minimum) output the parameters used in the fit # --> If you want to predict distance, add the special keyword "distance". define_output["outparams"] = ("Teff", "FeH", "logg", "radPhot", "massfin", "age") # Name of the output file containing the results of the fit in ascii format. # --> A version in xml-format will be automatically created # --> The same name will be used for the {.err, .warn} files define_output["outputfile"] = "results.ascii" # A dump of the statistics (chi2, logPDF) for all models in the grids can be saved # to a .json file. One file is produced per star. define_output["optionaloutputs"] = True # BASTA is designed to work with the median and corresponding Bayesian credibility # intervals or quantiles (16th and 84th percentile). Thus, by default BASTA will # report the median of the posterior distribution the parameter value and the # quantiles as the (asymmetric) errors. # --> For some applications (e.g. to compare with other results), it can be useful # to instead report the mean and standard deviation of the distributions. # --> Note that this only is reasonable for normal distributions! Please inspect # the corner plots beforehand. # --> Be aware that switching output mode is not fully tested and might be unstable! # define_output["centroid"] = "mean" # define_output["uncert"] = "std" # ================================================================================== # BLOCK 4: Plotting control # ================================================================================== # BASTA can produce various plots. Include the various lines to produce the plots. # Corner plots of posteriors. Specify a list of parameters to plot. # --> Typically, plotting the same quantities as written to the output is a # reasonable choice, unless you want to output many parameters to ascii. # --> If the keyword "distance" is present, an additional distance corner plot is # produced. # --> To disable, use an empty list or tuple. define_plots["cornerplots"] = define_output["outparams"] # BASTA can produce a Kiel diagram (Teff vs logg) with the observations and the # model points from the grid. The latter will be color coded based on the fitting # parameters and their uncertainties/constraints. define_plots["kielplots"] = True # When fitting frequencies or frequency ratios, BASTA can generate echelle diagrams # and plots of the surface independent quantities (ratios, epsilon differences). # - Setting True will produce *all* plots. # - Setting "allechelle" will skip the plot of ratios and/or epsilon differences, # which might be useful to save time when fitting individual frequencies (as the # surface independent parameters can take a while to compute). # - It can be a bool, a string or a list. # --> Commonly used options: ("allechelle", "ratios", "epsdiff", True, False) define_plots["freqplots"] = False # By default BASTA will save plots in png format, as they are much faster to # produce. This can be changed to pdf to obtain higher quality plots at the cost of # speed. For fitting a single star (especially with frequencies/ratios), pdf is # usually preferred. # define_plots["plotfmt"] = "pdf" # The star identifier is normally only contained in the name of plot files. # However, depending on the preferred post-processing procedure of the user, # it can be beneficial to include in the plots itself, which can be turned # on using this key. # define_plots["nameinplot"] = True # ================================================================================== # BLOCK 5: Interpolation # ================================================================================== # Based on the input grid of models, BASTA can use interpolation to increase the # resolution. The new interpolated grid will be saved for easy re-fitting. # --> Only parameters in "fitparams" and/or "outparams" will be saved to the # interpolated grid to keep the file size down! # --> If an interpolated grid is already present, BASTA will not calculate it anew. # Please remove the old interpolated grid if you want a fresh one. # --> BE AWARE that grid interpolation can be a very time consuming process!! # Note that this is a special block, as many different settings must be specified # when interpolating. interpolation = False if interpolation: define_intpol["intpolparams"] = {} # As interpolation is time consuming, it is normal procedure to define a 'box' # around the target star and only construct the interpolated grid in this # box. This will yield higher resolution in the important region of the grid, # while reducing the computation time and keeping the size of the interpolated # grid as low as possible. # Please set limits for the parameters (same syntax as for the flat priors) to # control the grid creation. # --> In addition to the standard observed quantities, it is also possible to # specify a limit on all other parameters, e.g., "massini": {"min": 1.05} # to only consider models with a mass above 1.05 solar masses. define_intpol["intpolparams"]["limits"] = { "Teff": {"sigmacut": 1}, "FeH": {"abstol": 0.2}, } # The method for interpolation. Possible options: # - case = "along": Interpolation along each track # - case = "across": Interpolation across/between tracks # - case = "combined": Interpolation across/between tracks and then afterwards # interpolation along each track # # The method for constructing the interpolated grid *if fitting multiple stars*. # Possible options: # - construction = "bystar": An interpolated grid will be produced for each star # - construction = "encompass": One interpolated grid will be produced to which # all stars are fitted. The limits specified above # will be used to find the extreme values for all # stars to make them all fit in the same grid. # WARNING: Only do this stars very close together # in parameter space! define_intpol["intpolparams"]["method"] = { "case": "combined", "construction": "bystar", } # An identifier to be added included in the naming of the interpolated grids. # Assuming bystar-construction, the grids will be automatically named # "intpol_<identifier>_STARID.hdf5" and placed in the output folder. define_intpol["intpolparams"]["name"] = "testgrid" # Settings for interpolation across tracks. Set one of these depending on which # type of grid: # - "scale": For Sobol sampling only. The increase in number of tracks. E.g. a # scale of 2 will double the number of tracks. # - "resolution": For Cartesian sampling only. The increase in resolution in the # sampling parameters. E.g., setting "resolution": {"FeHini": 2} # will double number of metallicities. It is a dict! # # Additionally, it is possible to specify the following: # - "baseparam": Parameter forming the base of the interpolation. Two cases: # * tracks: Using central density "rhocen" is recommended if, # available in the grid. Otherwise use central hydrogen # content "xcen". # * isochrones: Using final mass "massfin" is recommended. define_intpol["intpolparams"]["gridresolution"] = { "scale": 1.5, "baseparam": "rhocen", } # Settings for interpolation along a track. Specify the following: # - "param": The parameter used to define the resolution. Typically "dnufit" is # a good choice for asteroseismic applications. # - "value": The required resolution in the specified parameter. For "dnufit", # the frequency resolution between two consecutive models will be # this value (in microHz) or better. Note that "dnufit" is *not* # available for ishcornes (use e.g. "dnuSer" instead). # # Additionally, it is possible to specify the following: # - "baseparam": Parameter forming the base of the interpolation. Two cases: # * tracks: Using central density "rhocen" is recommended if, # available in the grid. Otherwise use central hydrogen # content "xcen". # * isochrones: Using final mass "massfin" is recommended. define_intpol["intpolparams"]["trackresolution"] = { "param": "dnufit", "value": 0.01, "baseparam": "rhocen", } # END OF INTERPOLATION # Done! Nothing more to specify. return ( xmlfilename, define_io, define_fit, define_output, define_plots, define_intpol, )
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~ AUTOMATED PART OF THE SCRIPT BELOW ~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # New in version 1.1: Imports auxiliary functions from utils collection! if __name__ == "__main__": from basta.utils_examples import make_basta_input # Process using the user defined input function above make_basta_input(define_user_input=define_input)