swissChili | 729acd5 | 2024-03-05 11:52:45 -0500 | [diff] [blame^] | 1 | #! /bin/sh |
| 2 | # ****************************************************************************** |
| 3 | # elemcvt: convert NIST "Linear ASCII" table of elements to units(1) format |
| 4 | # Usage: elemcvt [options] [<file>] |
| 5 | # Author: Jeff Conrad |
| 6 | # Date: 2024-01-06 |
| 7 | # ****************************************************************************** |
| 8 | |
| 9 | # Adjust PATH to suit. |
| 10 | # For Windows w/MKS Toolkit, this assumes /bin is a symbolic link to |
| 11 | # $ROOTDIR/mksnt. |
| 12 | |
| 13 | PATH=/bin |
| 14 | |
| 15 | progname=${0##*[/\\]} |
| 16 | progname=${progname%.sh} |
| 17 | export TITLEBAR=$progname |
| 18 | |
| 19 | umsg="Usage: $progname [options] [file] |
| 20 | Options: |
| 21 | -d Show elements for which no standard atomic mass is given and exit |
| 22 | -v Verbose" |
| 23 | |
| 24 | show_no_std_atomic_mass= # show elements for which no std atomic mass is given |
| 25 | verbose= |
| 26 | errors= |
| 27 | DUALCASE=1 # used in MKS Toolkit to make options case sensitive |
| 28 | |
| 29 | while getopts :dv arg |
| 30 | do |
| 31 | case $arg in |
| 32 | d) |
| 33 | show_no_std_atomic_mass=YES ;; |
| 34 | v) |
| 35 | verbose=YES ;; |
| 36 | :) |
| 37 | # OPTARG contains the option missing the argument |
| 38 | print -ru2 -- "$progname: option $OPTARG requires an argument" |
| 39 | errors=YES |
| 40 | ;; |
| 41 | [?]) |
| 42 | # OPTARG contains the invalid option |
| 43 | print -ru2 -- "$progname: unknown option $OPTARG" |
| 44 | errors=YES |
| 45 | ;; |
| 46 | esac |
| 47 | done |
| 48 | shift $((OPTIND - 1)) |
| 49 | unset DUALCASE |
| 50 | |
| 51 | if [ -n "$errors" ] |
| 52 | then |
| 53 | print -ru2 -- "$umsg" |
| 54 | exit 1 |
| 55 | fi |
| 56 | |
| 57 | awk ' |
| 58 | |
| 59 | function show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str) |
| 60 | { |
| 61 | printf("# %s: %s (%d)", names[atomic_number], atomic_symbol, atomic_number) |
| 62 | if (std_atomic_mass_str) |
| 63 | printf(" std atomic weight: %s", std_atomic_mass_str) |
| 64 | print "" |
| 65 | } |
| 66 | |
| 67 | # <name>_<atomic num> <mass> # <mole fraction> |
| 68 | function show_isotope(name, num) |
| 69 | { |
| 70 | printf("%-*s%*s%*.*f", max_isotope_len, sprintf("%s_%d", name, num), |
| 71 | sepwid, " ", isoprecis + 4, isoprecis, mass[num]) |
| 72 | if (composition[num]) |
| 73 | printf(" # %.*f", compprecis, composition[num]) |
| 74 | print "" |
| 75 | } |
| 76 | |
| 77 | function show_element_name(name) |
| 78 | { |
| 79 | printf("%-*s%*s", max_name_len, name, sepwid, " ") |
| 80 | } |
| 81 | |
| 82 | # <mole fraction> <name>_<mass num> |
| 83 | function mole_fraction(atomic_number, names, mass_num, mass_wid) |
| 84 | { |
| 85 | if (composition[mass_num] == 1) { |
| 86 | mass_wid = length(int(mass_number[n_isotopes])) |
| 87 | # align with 1st digit of atomic mass |
| 88 | printf("%*s%s_%d", sepwid + 2 - mass_wid, " ", names[atomic_number], mass_num) |
| 89 | } |
| 90 | else |
| 91 | printf("%.*f %s_%d", compprecis, composition[mass_num], |
| 92 | names[atomic_number], mass_num) |
| 93 | } |
| 94 | |
| 95 | # add line continuation and '+' sign |
| 96 | function add_continuation() |
| 97 | { |
| 98 | printf(" \\\n") |
| 99 | printf("%-*s+ ", max_name_len + sepwid - 2, " ") |
| 100 | } |
| 101 | |
| 102 | # <name>_<mass num> # most stable |
| 103 | function use_most_stable(atomic_number, mass, mass_wid) |
| 104 | { |
| 105 | mass_wid = length(int(mass_number[n_isotopes])) |
| 106 | printf("%*s%-*s # most stable", sepwid + 2 - mass_wid, " ", |
| 107 | isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass)) |
| 108 | } |
| 109 | |
| 110 | # <name>_<mass num> # standard atomic mass |
| 111 | function use_std_mass(atomic_number, mass, mass_wid) |
| 112 | { |
| 113 | mass_wid = length(int(mass_number[n_isotopes])) |
| 114 | printf("%*s%-*s # standard atomic mass", sepwid + 2 - mass_wid, " ", |
| 115 | isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass)) |
| 116 | } |
| 117 | |
| 118 | # show isotopes and the sum of mole fraction-mass products |
| 119 | function show_element_data(atomic_number, names) |
| 120 | { |
| 121 | # isotopes and relative abundances |
| 122 | for (ndx = 1; ndx <= n_isotopes; ndx++) { |
| 123 | mass_num = mass_number[ndx] |
| 124 | show_isotope(names[atomic_number], mass_num) |
| 125 | } |
| 126 | |
| 127 | # show a value for atomic mass if one of these is available; |
| 128 | # otherwise, show only isotope masses. |
| 129 | if (total_composition > 0 || most_stable_mass || std_atomic_mass) |
| 130 | show_element = 1 |
| 131 | else |
| 132 | show_element = 0 |
| 133 | |
| 134 | # atomic mass: sum of mole fraction-mass products |
| 135 | # element name |
| 136 | if (show_element) |
| 137 | show_element_name(names[atomic_number]) |
| 138 | |
| 139 | mass_num = mass_number[1] |
| 140 | firstval = 0 |
| 141 | |
| 142 | # first isotope |
| 143 | if (composition[mass_num] > 0) { |
| 144 | mole_fraction(atomic_number, names, mass_num) |
| 145 | firstval = 1 |
| 146 | } |
| 147 | if (n_isotopes > 1) { |
| 148 | for (ndx = 2; ndx < n_isotopes; ndx++) { |
| 149 | mass_num = mass_number[ndx] |
| 150 | if (composition[mass_num] > 0) { |
| 151 | if (firstval == 1) |
| 152 | add_continuation() |
| 153 | mole_fraction(atomic_number, names, mass_num) |
| 154 | firstval = 1 |
| 155 | } |
| 156 | } |
| 157 | # last isotope |
| 158 | mass_num = mass_number[n_isotopes] |
| 159 | if (composition[mass_num] > 0) { |
| 160 | if (firstval == 1) |
| 161 | add_continuation() |
| 162 | mole_fraction(atomic_number, names, mass_num) |
| 163 | print "" |
| 164 | } |
| 165 | } |
| 166 | else |
| 167 | print "" |
| 168 | |
| 169 | # options if mole fraction is not given for any isotope |
| 170 | if (total_composition == 0) { |
| 171 | if (most_stable_mass) |
| 172 | use_most_stable(atomic_number, most_stable_mass) |
| 173 | else if (std_atomic_mass) |
| 174 | use_std_mass(atomic_number, std_atomic_mass) |
| 175 | } |
| 176 | |
| 177 | if (! composition[mass_number[n_isotopes]]) |
| 178 | print "" |
| 179 | } |
| 180 | |
| 181 | function output(atomic_number) |
| 182 | { |
| 183 | sepwid = 5 # width of column separation |
| 184 | compprecis = 8 # for mole fraction |
| 185 | isoprecis = 10 # for isotope mass |
| 186 | |
| 187 | # NIST show H, D, and T |
| 188 | if (atomic_number == 1) |
| 189 | atomic_symbol = "H" |
| 190 | |
| 191 | show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str) |
| 192 | |
| 193 | if (am_names[atomic_number]) |
| 194 | print "# IUPAC spelling" |
| 195 | |
| 196 | show_element_data(atomic_number, names) |
| 197 | |
| 198 | # show American spelling if different from IUPAC |
| 199 | if (am_names[atomic_number]) { |
| 200 | print "# American spelling" |
| 201 | show_element_data(atomic_number, am_names) |
| 202 | } |
| 203 | if (show_element) |
| 204 | print "" # blank line between elements |
| 205 | |
| 206 | most_stable_mass = 0 |
| 207 | total_composition = 0 |
| 208 | } |
| 209 | |
| 210 | function gnu_notes() |
| 211 | { |
| 212 | print "\ |
| 213 | # This file is the elements database for use with GNU units, a units\n\ |
| 214 | # conversion program by Adrian Mariano adrianm@gnu.org\n\ |
| 215 | #\n\ |
| 216 | # January 2024 Version 1.0\n\ |
| 217 | #\n\ |
| 218 | # Copyright (C) 2024\n\ |
| 219 | # Free Software Foundation, Inc\n\ |
| 220 | #\n\ |
| 221 | # This program is free software; you can redistribute it and/or modify\n\ |
| 222 | # it under the terms of the GNU General Public License as published by\n\ |
| 223 | # the Free Software Foundation; either version 3 of the License, or\n\ |
| 224 | # (at your option) any later version.\n\ |
| 225 | #\n\ |
| 226 | # This data is distributed in the hope that it will be useful,\n\ |
| 227 | # but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ |
| 228 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ |
| 229 | # GNU General Public License for more details.\n\ |
| 230 | #\n\ |
| 231 | # You should have received a copy of the GNU General Public License\n\ |
| 232 | # along with this program; if not, write to the Free Software\n\ |
| 233 | # Foundation, Inc., 51 Franklin Street, Fifth Floor,\n\ |
| 234 | # Boston, MA 02110-1301 USA\n" |
| 235 | } |
| 236 | |
| 237 | function nist_notes() |
| 238 | { |
| 239 | print "# From https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses\n" |
| 240 | |
| 241 | # notes from https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-column-descriptions |
| 242 | print "\ |
| 243 | # For several elements, the standard atomic weight A_r is given as an\n\ |
| 244 | # atomic-weight interval with the symbol [a,b] to denote the set of\n\ |
| 245 | # atomic-weight values in normal materials; thus, [a <= A_r(E) <= b].\n\ |
| 246 | # The symbols a and b denote the lower and upper bounds of the\n\ |
| 247 | # interval [a,b], respectively. The values in parentheses, following\n\ |
| 248 | # the last significant digit to which they are attributed, are\n\ |
| 249 | # uncertainties.\n\ |
| 250 | #\n\ |
| 251 | # Brackets [ ] enclosing a single value indicate the mass number of\n\ |
| 252 | # the most stable isotope. For radioactive elements with atomic\n\ |
| 253 | # numbers 95 or greater, the mass number of the most stable isotope is\n\ |
| 254 | # not specified, as the list of studied isotopes is still\n\ |
| 255 | # incomplete.\n" |
| 256 | } |
| 257 | |
| 258 | function units_notes() |
| 259 | { |
| 260 | print "\ |
| 261 | # When composition mole fractions of isotopes are given, the atomic mass\n\ |
| 262 | # of an element is given as the sum of the product(s) of mole\n\ |
| 263 | # fraction(s) and the atomic masses of the relevant isotopes. When composition\n\ |
| 264 | # mole fractions are not given, the atomic mass is given as\n\ |
| 265 | #\n\ |
| 266 | # * the mass of the most stable isotope, if available, or\n\ |
| 267 | # * the standard atomic mass of the element, if available.\n\ |
| 268 | #\n\ |
| 269 | # If neither the most stable isotope nore a standard atomic mass is\n\ |
| 270 | # available, no atomic mass for the element is given; the user must\n\ |
| 271 | # select the isotope most suitable for their purposes.\n\ |
| 272 | #\n\ |
| 273 | # If the standard atomic mass is a range, the value given is the\n\ |
| 274 | # midpoint of that range, which may differ from the value determined\n\ |
| 275 | # from the sum of the products of composition mole fraction and isotope\n\ |
| 276 | # atomic mass.\n" |
| 277 | } |
| 278 | |
| 279 | BEGIN { |
| 280 | FS = " *= *" |
| 281 | show_no_std_atomic_mass = "'"$show_no_std_atomic_mass"'" |
| 282 | verbose = "'"$verbose"'" |
| 283 | console = "/dev/console" |
| 284 | |
| 285 | # IUPAC spellings |
| 286 | names[1] = "hydrogen" |
| 287 | names[2] = "helium" |
| 288 | names[3] = "lithium" |
| 289 | names[4] = "beryllium" |
| 290 | names[5] = "boron" |
| 291 | names[6] = "carbon" |
| 292 | names[7] = "nitrogen" |
| 293 | names[8] = "oxygen" |
| 294 | names[9] = "fluorine" |
| 295 | names[10] = "neon" |
| 296 | names[11] = "sodium" |
| 297 | names[12] = "magnesium" |
| 298 | names[13] = "aluminium" |
| 299 | names[14] = "silicon" |
| 300 | names[15] = "phosphorus" |
| 301 | names[16] = "sulfur" |
| 302 | names[17] = "chlorine" |
| 303 | names[18] = "argon" |
| 304 | names[19] = "potassium" |
| 305 | names[20] = "calcium" |
| 306 | names[21] = "scandium" |
| 307 | names[22] = "titanium" |
| 308 | names[23] = "vanadium" |
| 309 | names[24] = "chromium" |
| 310 | names[25] = "manganese" |
| 311 | names[26] = "iron" |
| 312 | names[27] = "cobalt" |
| 313 | names[28] = "nickel" |
| 314 | names[29] = "copper" |
| 315 | names[30] = "zinc" |
| 316 | names[31] = "gallium" |
| 317 | names[32] = "germanium" |
| 318 | names[33] = "arsenic" |
| 319 | names[34] = "selenium" |
| 320 | names[35] = "bromine" |
| 321 | names[36] = "krypton" |
| 322 | names[37] = "rubidium" |
| 323 | names[38] = "strontium" |
| 324 | names[39] = "yttrium" |
| 325 | names[40] = "zirconium" |
| 326 | names[41] = "niobium" |
| 327 | names[42] = "molybdenum" |
| 328 | names[43] = "technetium" |
| 329 | names[44] = "ruthenium" |
| 330 | names[45] = "rhodium" |
| 331 | names[46] = "palladium" |
| 332 | names[47] = "silver" |
| 333 | names[48] = "cadmium" |
| 334 | names[49] = "indium" |
| 335 | names[50] = "tin" |
| 336 | names[51] = "antimony" |
| 337 | names[52] = "tellurium" |
| 338 | names[53] = "iodine" |
| 339 | names[54] = "xenon" |
| 340 | names[55] = "caesium" |
| 341 | names[56] = "barium" |
| 342 | names[57] = "lanthanum" |
| 343 | names[58] = "cerium" |
| 344 | names[59] = "praseodymium" |
| 345 | names[60] = "neodymium" |
| 346 | names[61] = "promethium" |
| 347 | names[62] = "samarium" |
| 348 | names[63] = "europium" |
| 349 | names[64] = "gadolinium" |
| 350 | names[65] = "terbium" |
| 351 | names[66] = "dysprosium" |
| 352 | names[67] = "holmium" |
| 353 | names[68] = "erbium" |
| 354 | names[69] = "thulium" |
| 355 | names[70] = "ytterbium" |
| 356 | names[71] = "lutetium" |
| 357 | names[72] = "hafnium" |
| 358 | names[73] = "tantalum" |
| 359 | names[74] = "tungsten" |
| 360 | names[75] = "rhenium" |
| 361 | names[76] = "osmium" |
| 362 | names[77] = "iridium" |
| 363 | names[78] = "platinum" |
| 364 | names[79] = "gold" |
| 365 | names[80] = "mercury" |
| 366 | names[81] = "thallium" |
| 367 | names[82] = "lead" |
| 368 | names[83] = "bismuth" |
| 369 | names[84] = "polonium" |
| 370 | names[85] = "astatine" |
| 371 | names[86] = "radon" |
| 372 | names[87] = "francium" |
| 373 | names[88] = "radium" |
| 374 | names[89] = "actinium" |
| 375 | names[90] = "thorium" |
| 376 | names[91] = "protactinium" |
| 377 | names[92] = "uranium" |
| 378 | names[93] = "neptunium" |
| 379 | names[94] = "plutonium" |
| 380 | names[95] = "americium" |
| 381 | names[96] = "curium" |
| 382 | names[97] = "berkelium" |
| 383 | names[98] = "californium" |
| 384 | names[99] = "einsteinium" |
| 385 | names[100] = "fermium" |
| 386 | names[101] = "mendelevium" |
| 387 | names[102] = "nobelium" |
| 388 | names[103] = "lawrencium" |
| 389 | names[104] = "rutherfordium" |
| 390 | names[105] = "dubnium" |
| 391 | names[106] = "seaborgium" |
| 392 | names[107] = "bohrium" |
| 393 | names[108] = "hassium" |
| 394 | names[109] = "meitnerium" |
| 395 | names[110] = "darmstadtium" |
| 396 | names[111] = "roentgenium" |
| 397 | names[112] = "copernicium" |
| 398 | names[113] = "nihonium" |
| 399 | names[114] = "flerovium" |
| 400 | names[115] = "moscovium" |
| 401 | names[116] = "livermorium" |
| 402 | names[117] = "tennessine" |
| 403 | names[118] = "oganesson" |
| 404 | |
| 405 | # American spellings |
| 406 | am_names[13] = "aluminum" |
| 407 | am_names[55] = "cesium" |
| 408 | |
| 409 | max_name_len = 0 # length of longest element name |
| 410 | for (i = 1; i <= 118; i++) { |
| 411 | len = length(names[i]) |
| 412 | if (len > max_name_len) { |
| 413 | max_name_len = len; |
| 414 | longestname = names[i] |
| 415 | } |
| 416 | } |
| 417 | max_isotope_len = max_name_len + 4 # allow for "_xxx" suffix |
| 418 | |
| 419 | if (! show_no_std_atomic_mass) { |
| 420 | gnu_notes() |
| 421 | nist_notes() |
| 422 | units_notes() |
| 423 | } |
| 424 | |
| 425 | if (verbose) |
| 426 | printf("Longest element name: %s (%d)\n\n", longestname, max_name_len) |
| 427 | |
| 428 | n_isotopes = 0 |
| 429 | mass_number[1] = 0 |
| 430 | } |
| 431 | |
| 432 | # begin file processing |
| 433 | |
| 434 | # skip JavaScript and HTML before data |
| 435 | NR == 1, $0 ~ /<pre/ { next } |
| 436 | # skip HTML after data |
| 437 | $0 ~ /<\/pre>/ { exit } |
| 438 | |
| 439 | # remove trailing space and unpaddable spaces |
| 440 | { |
| 441 | gsub(/ /, "") |
| 442 | gsub(/ +$/, "") |
| 443 | } |
| 444 | |
| 445 | $1 ~ /Atomic Number/ { |
| 446 | last_atomic_number = atomic_number |
| 447 | atomic_number = $2 + 0 |
| 448 | if (atomic_number != last_atomic_number && atomic_number > 1) { |
| 449 | if (show_no_std_atomic_mass) { |
| 450 | if (! std_atomic_mass_str) |
| 451 | print names[last_atomic_number] |
| 452 | } |
| 453 | else |
| 454 | output(last_atomic_number) |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | $1 ~ /Atomic Symbol/ { |
| 459 | atomic_symbol = $2 |
| 460 | } |
| 461 | |
| 462 | $1 ~ /Mass Number/ { |
| 463 | if (atomic_number != last_atomic_number) { |
| 464 | for (i = 1; i <= n_isotopes; i++) |
| 465 | delete mass_number[i] |
| 466 | n_isotopes = 0 |
| 467 | } |
| 468 | mass_number[++n_isotopes] = $2 |
| 469 | } |
| 470 | |
| 471 | $1 ~ /Relative Atomic Mass/ { |
| 472 | atomic_mass = $2 |
| 473 | sub(/\([[:digit:]#]+\)/, "", atomic_mass) |
| 474 | mass[mass_number[n_isotopes]] = atomic_mass |
| 475 | } |
| 476 | |
| 477 | $1 ~ /Isotopic Composition/ { |
| 478 | isotopic_composition = $2 |
| 479 | sub(/\([[:digit:]#]+\)/, "", isotopic_composition) |
| 480 | composition[mass_number[n_isotopes]] = isotopic_composition |
| 481 | total_composition += isotopic_composition |
| 482 | } |
| 483 | |
| 484 | $1 ~ /Standard Atomic Weight/ { |
| 485 | std_atomic_mass = std_atomic_mass_str = $2 |
| 486 | gsub(/\([^)]+\)/, "", std_atomic_mass) |
| 487 | gsub(/[][]/, "", std_atomic_mass) |
| 488 | if (std_atomic_mass ~ /,/) { |
| 489 | split(std_atomic_mass, range, /,/) |
| 490 | std_atomic_mass = (range[1] + range[2]) / 2 |
| 491 | } |
| 492 | if (std_atomic_mass_str ~ /\[[[:digit:].]+\]/) |
| 493 | most_stable_mass = std_atomic_mass |
| 494 | last_atomic_number = atomic_number |
| 495 | } |
| 496 | |
| 497 | END { |
| 498 | if (show_no_std_atomic_mass) { |
| 499 | if (! std_atomic_mass_str) |
| 500 | print names[last_atomic_number] |
| 501 | } |
| 502 | else |
| 503 | output(last_atomic_number) |
| 504 | } ' $* |